In [1]:
import json
from pymongo.mongo_client import MongoClient
from pymongo import UpdateOne
import os

In [2]:
def clear_collection(uri, db, collection_name):
    client = MongoClient(uri)
    try:
        client.admin.command('ping')
        print("Pinged your deployment. You successfully connected to MongoDB!")
    except Exception as e:
        print('Exception', e)

    db = client[db]
    collection = db[collection_name]
    
    return db.drop_collection(collection)['ok']

In [3]:
def load_json(collection, documents):

    bulk_operations = []
    for document in documents:
        custom_id = document['URL']             # 'URL' field as custom ID
        del document['URL']
    
        bulk_operations.append(
            UpdateOne(
                {'_id': custom_id},  
                {'$set': document},  
                upsert=True                     # update in case of duplicate to avoid duplicate key exception
            )
        )

    if bulk_operations:
        result = collection.bulk_write(bulk_operations)
        print(f"Bulk write completed. Matched: {result.matched_count}, Upserted: {result.upserted_count}")

    return collection.count_documents({})


def load_locations(collection, documents):

    bulk_operations = []
    for document in documents:
        custom_id = document['Doctor profile']      # 'URL' field as ID
        del document['Doctor profile']
    
        bulk_operations.append(
            UpdateOne(
                {'_id': custom_id},  
                {'$set': document},  
                upsert=True                         # add location if doctor exists, insert new doctor if it doesn't exist yet
            )
        )

    if bulk_operations:
        result = collection.bulk_write(bulk_operations)
        print(f"Bulk write completed. Matched: {result.matched_count}, Upserted: {result.upserted_count}")

    return collection.count_documents({})



def insert_json(path, collection):
    documents = []
    with open(path, 'r') as f:
        for line in f:
            json_object = json.loads(line)
            documents.append(json_object)

    print(load_json(collection, documents))


def insert_locations(path, collection):
    documents = []
    with open(path, 'r') as f:
        for line in f:
            json_object = json.loads(line)
            documents.append(json_object)

    print(load_locations(collection, documents))


def insert_json_dir(path, collection):
    for file in os.listdir(path):
        insert_json(path+'/'+file, collection)


def get_doctors_collection(client):
    db_name = 'medical_app'
    collection_name = 'doctors'
    db = client[db_name]
    collection = db[collection_name]
    return collection

In [8]:
uri = "mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+2.2.6"
#print(clear_collection(uri, 'medical_app', 'doctors'))

#path = './dica33/json/doctors'
path = './medicitalia/json/doctors'


client = MongoClient(uri)
try:
    client.admin.command('ping')
except Exception as e:
    print(e)


collection = get_doctors_collection(client)
insert_json_dir(path, collection)
locations_file = './dica33/json/doctors_location/part-00000-1044e462-5c75-4c5d-a322-29e2bfc6599f-c000.json'
insert_locations(locations_file, collection)


Bulk write completed. Matched: 1331, Upserted: 153
11614
