### Import Libraries

In [1]:
import configparser as configparser
import pandas as pd
import pymongo
import json
from decimal import Decimal

## Parsing INI File

### Funtion: To parse INI file

In [2]:
def parse_ini(section: str) -> dict:
    """
    This function parses ini file for configuration details
    :param section: section to read from ini
    :return: Dictionary of config details
    """
    config = dict()
    parser = configparser.ConfigParser()
    parser.read("imdb_database.ini")
    if parser.has_section(section):
        config_items = parser.items(section)
        for item in config_items:
            config[item[0]] = item[1]
    return config

In [3]:
mongo_config = parse_ini("mongodb")
mongo_config

{'host': 'localhost', 'database': 'imdb', 'port': '27017'}

## Processing "extra-data.json"

### Reading extra-data.json file and splitting and loading every object into list of JSON objects

In [4]:
with open('extra-data.json', 'r') as file:
    temp = file.read()  
    temp = temp.split("}\n")
    temp = [data.strip() + "}" for data in temp]
    temp = list(filter(("}").__ne__, temp))
    temp = [json.loads(data) for data in temp]

extra_data = temp
extra_data

[{'box_office_currencyLabel': {'xml:lang': 'en',
   'type': 'literal',
   'value': 'United States dollar'},
  'titleLabel': {'type': 'literal', 'value': 'A Good Day to Die Hard'},
  'IMDb_ID': {'type': 'literal', 'value': 'tt1606378'},
  'cost': {'datatype': 'http://www.w3.org/2001/XMLSchema#decimal',
   'type': 'literal',
   'value': '92000000'},
  'distributorLabel': {'xml:lang': 'en',
   'type': 'literal',
   'value': 'InterCom'},
  'box_office': {'datatype': 'http://www.w3.org/2001/XMLSchema#decimal',
   'type': 'literal',
   'value': '304654182'}},
 {'box_office_currencyLabel': {'xml:lang': 'en',
   'type': 'literal',
   'value': 'United States dollar'},
  'titleLabel': {'type': 'literal', 'value': "De rouille et d'os"},
  'IMDb_ID': {'type': 'literal', 'value': 'tt2053425'},
  'cost': {'datatype': 'http://www.w3.org/2001/XMLSchema#decimal',
   'type': 'literal',
   'value': '16000000'},
  'distributorLabel': {'xml:lang': 'en',
   'type': 'literal',
   'value': 'InterCom'},
  'box

In [5]:
len(extra_data)

230825

### Counting documents with "IMDb_ID" field

In [6]:
count = 0
for data in extra_data:
    data1 = data
    if 'IMDb_ID' in data:
        id = data['IMDb_ID']['value']
        count += 1
count

181968

### Creating UpdateOne Requests for MongoDB

In [7]:
requests = []
for data in extra_data:
    data1 = data
    if 'IMDb_ID' in data:
        id = data['IMDb_ID']['value']
        filter = {'_id': int(id[2:])}
        update = dict()
        currency = "United States dollar"
        if 'box_office_currencyLabel' in data:
            currency = data['box_office_currencyLabel']['value']
        if 'box_office' in data:
            try:
                update['box_office'] = {'value': int(data['box_office']['value']), 'currency': currency}
            except ValueError:
                try:
                    value = float(data['box_office']['value'])
                    update['box_office'] = {'value': int(value), 'currency': currency}
                except ValueError:
                    pass 
        if 'cost' in data:
            try:
                update['cost'] = {'value': int(data['cost']['value']), 'currency': currency}
            except ValueError:
                try:
                    value = float(data['cost']['value'])
                    update['cost'] = {'value': int(value), 'currency': currency}
                except ValueError:
                    pass
        if 'distributorLabel' in data:
            update['distributor'] = data['distributorLabel']['value']
        if 'MPAA_film_ratingLabel' in data:
            update['rating_label'] = data['MPAA_film_ratingLabel']['value']
        if bool(update):
            requests += [pymongo.UpdateOne(filter=filter, update={'$set': update})]

requests

[UpdateOne({'_id': 1606378}, {'$set': {'box_office': {'value': 304654182, 'currency': 'United States dollar'}, 'cost': {'value': 92000000, 'currency': 'United States dollar'}, 'distributor': 'InterCom'}}, False, None, None, None),
 UpdateOne({'_id': 2053425}, {'$set': {'box_office': {'value': 19500000, 'currency': 'United States dollar'}, 'cost': {'value': 16000000, 'currency': 'United States dollar'}, 'distributor': 'InterCom'}}, False, None, None, None),
 UpdateOne({'_id': 58461}, {'$set': {'box_office': {'value': 3500000, 'currency': 'United States dollar'}, 'cost': {'value': 200000, 'currency': 'United States dollar'}, 'distributor': 'Mokép'}}, False, None, None, None),
 UpdateOne({'_id': 58461}, {'$set': {'box_office': {'value': 3500000, 'currency': 'United States dollar'}, 'cost': {'value': 200000, 'currency': 'United States dollar'}, 'distributor': 'Mokép'}}, False, None, None, None),
 UpdateOne({'_id': 467406}, {'$set': {'box_office': {'value': 231400000, 'currency': 'United St

In [12]:
len(requests)

49039

## Updating JSON data to IMDB MongoDB

### Connecting to IMDB MongoDB

In [8]:
connection_string = "mongodb://" + mongo_config['host'] + ":" + mongo_config['port']
mongo_client = pymongo.MongoClient(connection_string)
imdb = mongo_client[mongo_config['database']]
imdb

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'imdb')

In [9]:
movie_collection = imdb['Movies']

### Bulk writing Update operations to IMDB

In [10]:
output = movie_collection.bulk_write(requests)

In [11]:
output.bulk_api_result

{'writeErrors': [],
 'writeConcernErrors': [],
 'nInserted': 0,
 'nUpserted': 0,
 'nMatched': 48879,
 'nModified': 48437,
 'nRemoved': 0,
 'upserted': []}