In [1]:
import json
from pymongo.mongo_client import MongoClient
from pymongo import UpdateOne
from dateutil import parser
import os

In [2]:
def clear_collection(uri, db, collection_name):
    client = MongoClient(uri)
    try:
        client.admin.command('ping')
        print("Pinged your deployment. You successfully connected to MongoDB!")
    except Exception as e:
        print('Exception', e)

    db = client[db]
    collection = db[collection_name]
    
    return db.drop_collection(collection)['ok']

In [3]:
def load_json(collection, documents):
    count = collection.count_documents({})

    bulk_operations = []
    for document in documents:
        custom_id = document['URL']                     # 'URL' field as custom ID
        del document['URL']

        try:
            if document['Question Date'] == '0000-00-00T00:00:00Z':
                document['Question Date'] = None
            else:
                document['Question Date'] = parser.isoparse(document['Question Date'].split('T')[0])
        except:
            document['Question Date'] = None

        try:
            if document['Answer Date'] == '0000-00-00T00:00:00Z':
                document['Answer Date'] = None
            else:
                document['Answer Date'] = parser.isoparse(document['Answer Date'].split('T')[0])
        except:
            document['Answer Date'] = None
        
        bulk_operations.append(
            UpdateOne(
                {'_id': custom_id},  
                {'$set': document},  
                upsert=True                             # insert if it doesn't exist
            )
        )

    if bulk_operations:
        result = collection.bulk_write(bulk_operations)
        print(f"Bulk write completed. Matched: {result.matched_count}, Upserted: {result.upserted_count}")

    return collection.count_documents({}) - count       # number of new documents inserted


def insert_json(path, collection):
    documents = []
    with open(path, 'r') as f:
        for line in f:
            json_object = json.loads(line)
            documents.append(json_object)

    print(load_json(collection, documents))


def insert_json_dir(path, collection):
    for file in os.listdir(path):
        insert_json(path+'/'+file, collection)

def get_analytics_collection(client):
    db_name = 'medical_app'
    collection_name = 'analytics'
    db = client[db_name]
    collection = db[collection_name]
    return collection

In [5]:
uri = "mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+2.2.6"
#print(clear_collection(uri, 'medical_app', 'analytics'))

#path = './dica33/json/analytics'
path = './medicitalia/json/analytics'


client = MongoClient(uri)
try:
    client.admin.command('ping')
except Exception as e:
    print(e)


collection = get_analytics_collection(client)
insert_json_dir(path, collection)

Bulk write completed. Matched: 0, Upserted: 16434
78897
Bulk write completed. Matched: 0, Upserted: 16427
95324
Bulk write completed. Matched: 0, Upserted: 16541
111865
Bulk write completed. Matched: 0, Upserted: 16609
128474
Bulk write completed. Matched: 0, Upserted: 16426
144900
Bulk write completed. Matched: 0, Upserted: 16438
161338
Bulk write completed. Matched: 0, Upserted: 16408
177746
Bulk write completed. Matched: 0, Upserted: 16324
194070
Bulk write completed. Matched: 0, Upserted: 16426
210496
Bulk write completed. Matched: 0, Upserted: 16295
226791
Bulk write completed. Matched: 0, Upserted: 16520
243311
Bulk write completed. Matched: 0, Upserted: 16463
259774
Bulk write completed. Matched: 0, Upserted: 8245
268019
