In [1]:
import os
import pandas as pd
from pymongo import MongoClient
from bson import ObjectId
import ast

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def insert_csv_to_mongodb(csv_folder, mongodb_uri, db_name):
    # Connect to MongoDB
    client = MongoClient(mongodb_uri)
    db = client[db_name]

    # Iterate over each CSV file in the folder
    for csv_file in os.listdir(csv_folder):
        if csv_file.endswith('.csv'):
            collection_name = os.path.splitext(csv_file)[0]  # Use CSV file name as collection name

            # Construct full path to CSV file
            csv_file_path = os.path.join(csv_folder, csv_file)

            # Read CSV file into pandas DataFrame
            df = pd.read_csv(csv_file_path)

            # Convert DataFrame to list of dictionaries (each row is a document)
            data = df.to_dict(orient='records')

            # Adjust data types before inserting into MongoDB
            for doc in data:
                # Convert _id to ObjectId if present
                if '_id' in doc:
                    doc['_id'] = ObjectId(doc['_id'])

                # Parse 'modules' column if exists
                if 'modules' in doc:
                    modules_str = doc['modules']
                    try:
                        modules_list = ast.literal_eval(modules_str)
                        doc['modules'] = [ObjectId(module) for module in modules_list]
                    except (SyntaxError, ValueError):
                        # Handle invalid literal_eval conversion
                        print(f"Error parsing 'modules' field in document: {doc}")

                # Parse 'trainer' column if exists
                if 'trainer' in doc:
                    trainer_str = doc['trainer']
                    try:
                        trainer_list = ast.literal_eval(trainer_str)
                        doc['trainer'] = [ObjectId(trainer) for trainer in trainer_list]
                    except (SyntaxError, ValueError):
                        # Handle invalid literal_eval conversion
                        print(f"Error parsing 'trainer' field in document: {doc}")

            # Get or create collection in MongoDB
            collection = db[collection_name]

            # Insert data into MongoDB
            if data:  # Insert only if there is data
                collection.insert_many(data)
                print(f"Data from '{csv_file}' inserted into '{collection_name}' collection.")
            else:
                print(f"No data to insert from '{csv_file}'.")

    # Close MongoDB connection
    client.close()
    print("All CSV data inserted into MongoDB.")


In [3]:
if __name__ == "__main__":
    # Folder containing CSV files
    csv_folder_path = 'data/'

    # MongoDB connection URI (replace with your MongoDB URI)
    mongodb_uri = "mongodb+srv://rajibparbat55:5Bkfei54Gh29aXGe@mongodbcluster.pq6t9ky.mongodb.net/?retryWrites=true&w=majority&appName=MongoDbCluster"

    # MongoDB database name
    db_name = 'dummydata'

    # Insert CSV data into MongoDB
    insert_csv_to_mongodb(csv_folder_path, mongodb_uri, db_name)


Data from 'events.csv' inserted into 'events' collection.
Data from 'marks.csv' inserted into 'marks' collection.
Data from 'modules.csv' inserted into 'modules' collection.
Data from 'quizzes.csv' inserted into 'quizzes' collection.
Data from 'registers.csv' inserted into 'registers' collection.
All CSV data inserted into MongoDB.
