In [47]:
import polars as pl
from datetime import datetime
from dotenv import load_dotenv
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
import os

In [54]:
def create_mongo_client(config):
    """ Create a mongodb client. """
    
    client = MongoClient(f"mongodb://{config['MONGO_ADMIN_USERNAME']}:{config['MONGO_ADMIN_PASSWORD']}@{config['SERVER']}:{config['PORT']}")
    
    test_connexion(client)
    
    return client

def test_connexion(mongo_client):
    """ Test that the connexion to mongodb is working. """   
    try:
        mongo_client.admin.command('ping')
        print("✅   Connection to MongoDB successful!")

    except ConnectionFailure as e:
        raise ConnectionFailure("❌   Could not connect to MongoDB:", e)
           
def load_config():
    """ Load environment variables contained in '.env' file. """
    
    load_dotenv()

    config = {
        'SERVER' : os.getenv('SERVER'),
        'PORT' : int(os.getenv('PORT')),
        
        'DATABASE' : os.getenv('DATABASE'),
        'COLLECTION' : os.getenv('COLLECTION'),
        'COLLECTION_INFO' : os.getenv('COLLECTION_INFO'),

        'FILE_PATH' : os.getenv('FILE_PATH') ,
        'LOGGING_PATH' : os.getenv('LOGGING_PATH'),

        # Split the comma-separated strings into lists
        'REQUIRED_COLUMNS' : os.getenv('REQUIRED_COLUMNS', '').split(','),
        'PATIENT_ID_COLUMNS' : os.getenv('PATIENT_ID_COLUMNS', '').split(','),
        
        'MONGO_ADMIN_USERNAME' : os.getenv('MONGO_ADMIN_USERNAME'),
        'MONGO_ADMIN_PASSWORD' : os.getenv('MONGO_ADMIN_PASSWORD')
    }
    
    missing_variables = []
    
    for key in config:
        if config[key] == None:
            missing_variables.append(key)
    
    if missing_variables:
        raise ValueError(f"Error during config loading, the following variables are missing : {missing_variables}")
    
    return config

In [None]:
MONGO_URI = "mongodb://admin:securepassword@localhost:27017"

client = MongoClient(MONGO_URI)

client.list_database_names()

client.admin.command("ping")

{'ok': 1.0}

In [84]:
client.list_database_names()

['admin', 'config', 'db_medical', 'local']

In [87]:
db = client['db_medical']
collection = db['hospital_admissions']
collection_info = db['insert_informations']

In [89]:
cursor = collection_info.find()
for doc in cursor:
    print(doc)

{'_id': ObjectId('676513517cd5c86a6b0a9045'), 'file': 'data/healthcare_dataset.csv', 'row_count': 54966, 'column_count': 17, 'execution_date': '2024-12-20'}


In [91]:
collection.count_documents({})

54966

In [93]:
docs = list(collection.find())

In [94]:
df = pl.DataFrame(docs)

In [95]:
df

_id,patient_id,name,age,gender,blood_type,medical_condition,date_of_admission,doctor,hospital,insurance_provider,billing_amount,room_number,admission_type,discharge_date,medication,test_results
str,str,str,i64,str,str,str,datetime[μs],str,str,str,f64,i64,str,datetime[μs],str,str
"""1504781546238256573-2021-03-08""","""1504781546238256573""","""Tommy Evans Md""",44,"""Female""","""B+""","""Obesity""",2021-03-08 00:00:00,"""Tracy Watson""","""Chambers-Bradley""","""Blue Cross""",28980.424846,146,"""Emergency""",2021-03-21 00:00:00,"""Paracetamol""","""Normal"""
"""8058859010951750242-2023-08-03""","""8058859010951750242""","""Katherine Price""",21,"""Female""","""AB+""","""Diabetes""",2023-08-03 00:00:00,"""Eric Smith""","""Stewart and Sons""","""Medicare""",31881.838107,190,"""Elective""",2023-08-29 00:00:00,"""Aspirin""","""Inconclusive"""
"""13641344706220671457-2020-07-1…","""13641344706220671457""","""Elizabeth Rangel""",66,"""Male""","""AB+""","""Obesity""",2020-07-15 00:00:00,"""Dawn Roberts""","""Schmidt-Rogers""","""Blue Cross""",16802.31092,401,"""Urgent""",2020-07-22 00:00:00,"""Penicillin""","""Abnormal"""
"""11912633423167942286-2021-02-1…","""11912633423167942286""","""Andrea Nguyen""",61,"""Female""","""AB-""","""Arthritis""",2021-02-13 00:00:00,"""Amy Orozco""","""and Coleman, Guerrero Nguyen""","""Medicare""",5399.995846,286,"""Elective""",2021-02-23 00:00:00,"""Aspirin""","""Abnormal"""
"""8150799142532430497-2023-10-28""","""8150799142532430497""","""Katelyn Mcdowell""",26,"""Male""","""O+""","""Diabetes""",2023-10-28 00:00:00,"""William Garner""","""PLC Davis""","""Blue Cross""",48950.096662,450,"""Urgent""",2023-11-05 00:00:00,"""Aspirin""","""Normal"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""17988906612196992406-2019-12-1…","""17988906612196992406""","""Angela Murphy""",61,"""Female""","""AB+""","""Hypertension""",2019-12-12 00:00:00,"""Lori Perry""","""Miranda-Elliott""","""UnitedHealthcare""",10332.176476,490,"""Urgent""",2020-01-10 00:00:00,"""Penicillin""","""Inconclusive"""
"""17681316916324960716-2022-06-1…","""17681316916324960716""","""Riley Mata""",48,"""Female""","""B-""","""Diabetes""",2022-06-11 00:00:00,"""Gary Paul""","""Stewart Inc""","""Aetna""",29730.587081,129,"""Elective""",2022-06-26 00:00:00,"""Penicillin""","""Normal"""
"""5242523524907585881-2023-04-21""","""5242523524907585881""","""Jason Torres""",39,"""Female""","""A-""","""Hypertension""",2023-04-21 00:00:00,"""Mr. Stephen Garza MD""","""Ltd Thornton""","""Cigna""",32162.818121,353,"""Emergency""",2023-04-29 00:00:00,"""Penicillin""","""Inconclusive"""
"""7164730608239042421-2023-11-27""","""7164730608239042421""","""Anna Rodriguez""",21,"""Male""","""A-""","""Asthma""",2023-11-27 00:00:00,"""Jeffery Wilson""","""Fitzgerald, Cummings Reed and""","""Cigna""",23529.167007,306,"""Urgent""",2023-12-25 00:00:00,"""Ibuprofen""","""Normal"""
