In [1]:
import pandas as pd

import os
import pymongo
from pymongo.errors import AutoReconnect
import time
from dotenv import load_dotenv

In [2]:
df = pd.read_csv("depression.csv")
df.head(10)

Unnamed: 0,age,gender,sleep_quality_index,brain_fog_level,physical_pain_score,stress_level,depression_phq9_score,fatigue_severity_scale_score,pem_duration_hours,hours_of_sleep_per_night,pem_present,work_status,social_activity_level,exercise_frequency,meditation_or_mindfulness,diagnosis
0,56,Male,8.7,3.9,9.2,8.1,10.0,6.5,9.0,7.7,0,Working,Low,Daily,Yes,Depression
1,69,Male,1.3,9.9,4.2,9.9,20.0,7.0,41.0,8.4,1,Working,Low,Often,Yes,Both
2,46,Female,4.0,5.4,4.8,,24.0,1.6,13.0,6.9,0,Partially working,,Rarely,Yes,Depression
3,32,Female,9.4,2.1,2.9,3.8,10.0,6.8,11.0,7.5,0,Not working,High,Never,Yes,Depression
4,60,Female,7.6,7.5,6.4,8.5,17.0,7.0,46.0,3.1,1,Not working,Low,Rarely,No,Both
5,25,Female,3.5,3.9,6.4,6.5,9.0,7.5,41.0,4.1,1,Partially working,Medium,Never,No,ME/CFS
6,38,Female,3.3,10.0,4.3,6.2,15.0,7.0,29.0,9.9,1,Not working,Very low,Sometimes,Yes,Both
7,56,Male,1.0,9.8,4.0,3.3,10.0,4.5,31.0,3.5,0,Not working,Very high,Daily,Yes,Depression
8,36,Female,7.3,6.9,9.0,7.8,9.0,7.0,31.0,7.8,1,Working,High,,No,ME/CFS
9,40,Female,1.4,2.2,4.4,1.7,9.0,9.8,41.0,4.2,1,Partially working,Medium,Rarely,Yes,ME/CFS


In [3]:
df.shape

(1000, 16)

In [4]:
df.columns

Index(['age', 'gender', 'sleep_quality_index', 'brain_fog_level',
       'physical_pain_score', 'stress_level', 'depression_phq9_score',
       'fatigue_severity_scale_score', 'pem_duration_hours',
       'hours_of_sleep_per_night', 'pem_present', 'work_status',
       'social_activity_level', 'exercise_frequency',
       'meditation_or_mindfulness', 'diagnosis'],
      dtype='object')

In [5]:
data = df.to_dict(orient="records")
data

[{'age': 56,
  'gender': 'Male',
  'sleep_quality_index': 8.7,
  'brain_fog_level': 3.9,
  'physical_pain_score': 9.2,
  'stress_level': 8.1,
  'depression_phq9_score': 10.0,
  'fatigue_severity_scale_score': 6.5,
  'pem_duration_hours': 9.0,
  'hours_of_sleep_per_night': 7.7,
  'pem_present': 0,
  'work_status': 'Working',
  'social_activity_level': 'Low',
  'exercise_frequency': 'Daily',
  'meditation_or_mindfulness': 'Yes',
  'diagnosis': 'Depression'},
 {'age': 69,
  'gender': 'Male',
  'sleep_quality_index': 1.3,
  'brain_fog_level': 9.9,
  'physical_pain_score': 4.2,
  'stress_level': 9.9,
  'depression_phq9_score': 20.0,
  'fatigue_severity_scale_score': 7.0,
  'pem_duration_hours': 41.0,
  'hours_of_sleep_per_night': 8.4,
  'pem_present': 1,
  'work_status': 'Working',
  'social_activity_level': 'Low',
  'exercise_frequency': 'Often',
  'meditation_or_mindfulness': 'Yes',
  'diagnosis': 'Both'},
 {'age': 46,
  'gender': 'Female',
  'sleep_quality_index': 4.0,
  'brain_fog_level

In [6]:
len(data)

1000

In [7]:
DB_NAME = "DEPRESSION_DB"
COLLECTION_NAME = "data"
load_dotenv()
mongo_url=os.getenv("MONGO_URL")

In [8]:
client = pymongo.MongoClient(mongo_url, serverSelectionTimeoutMS=30000, socketTimeoutMS=30000)
data_base = client[DB_NAME]
collection=data_base[COLLECTION_NAME]
#use batch loading to insert the data into MongoDB because th dataset is large

BATCH_SIZE = 500
MAX_RETRIES = 3

for i in range(0, len(data), BATCH_SIZE):
    batch = data[i:i + BATCH_SIZE]
    for attempt in range(MAX_RETRIES):
        try:
            collection.insert_many(batch)
            break
        except AutoReconnect as e:
            print(f"Retrying batch insert ({i}-{i+BATCH_SIZE}) after error : {e}") 
            time.sleep(5)
    else:
        print(f"Failed to insert batch {i} - {i+BATCH_SIZE} after retries ")