In [3]:
import pandas as pd
import os
from pymongo import MongoClient

MONGO_HOST = 'localhost'
MONGO_PORT = 27017
MONGO_DB = 'admin'
MONGO_USERNAME = 'root'
MONGO_PASSWORD = 'mongo_password'

def get_mongo_client():
    uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
    client = MongoClient(uri)
    return client

def get_database():
    try:
        client = get_mongo_client()
        db = client[MONGO_DB]
        print("DB connection OK")
        return db
    except Exception as e:
        print("DB connection error:", e)
        return None


BASE_PATH = '../../../data/'
activity_log_path = os.path.join(BASE_PATH, 'ACTIVITY_LOG.csv')
user_log_path = os.path.join(BASE_PATH, 'USER_LOG.csv')
component_codes_path = os.path.join(BASE_PATH, 'COMPONENT_CODES.csv')


def save_csv_to_mongodb(file_path, collection_name, rename_column=None, add_month_column=False, date_column=None):
    try:
        data = pd.read_csv(file_path)

        # RENAME
        if rename_column:
            old_name, new_name = rename_column
            if old_name in data.columns:
                data.rename(columns={old_name: new_name}, inplace=True)
                print(f"Renamed column '{old_name}' to '{new_name}'")

        # Add Month column for COUNT
        if add_month_column and date_column:
            if date_column in data.columns:
                data['Date'] = pd.to_datetime(data[date_column], format='%d/%m/%Y %H:%M', errors='coerce')
                data['Month'] = data['Date'].dt.month
                print(f"Added 'Month' column based on '{date_column}' column")

        # Save to MongoDB for BACKUP
        db = get_database()
        collection = db[collection_name]
        if collection_name in db.list_collection_names():
            collection.drop()

        records = data.to_dict('records')
        collection.insert_many(records)
        print(f"Saved {len(records)} records to MongoDB collection: {collection_name}")
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")


# MAIN (save raw data to mongodb)
save_csv_to_mongodb(activity_log_path, 'activity_log', rename_column=('User Full Name *Anonymized', 'User_ID'))
save_csv_to_mongodb(user_log_path, 'user_log', rename_column=('User Full Name *Anonymized', 'User_ID'), add_month_column=True, date_column='Date')
save_csv_to_mongodb(component_codes_path, 'component_codes')

Loaded data from ../../data/USER_LOG.csv
Renamed column 'User Full Name *Anonymized' to 'User_ID'
Added 'Month' column based on 'Date' column
DB connection OK
Saved 150835 records to MongoDB collection: user_log
