In [1]:
import pymongo
import pandas as pd
import os

import pymongo.errors as mongo_errors

import certifi

In [2]:
import datetime
import pandas as pd

def date_to_milliseconds(date_str, date_format='%m/%d/%Y %H:%M'):
    try:
        dt = datetime.datetime.strptime(date_str, date_format)
        epoch = datetime.datetime.utcfromtimestamp(0)  # Unix epoch start time
        return int((dt - epoch).total_seconds() * 1000)
    except ValueError:
        # Handle the exception if the date_str format is incorrect
        return None

def build_collection(df, collection, column_mapping, indices, date_columns, ind_comp=None):
    """
    Inserts data from a pandas DataFrame into a MongoDB collection based on specified column mapping.

    :param df: pandas DataFrame containing the data to be inserted.
    :param collection: MongoDB collection object where the data will be inserted.
    :param column_mapping: Dictionary mapping DataFrame column names to MongoDB document field names.
    :param indices: List of field names to create an index for.
    :param date_columns: List of DataFrame column names that contain date strings.
    :param ind_comp: List of lists containing multiple fields to create compound indices.
    """

    for index, row in df.iterrows():
        document = {}
        for csv_col, mongo_attr in column_mapping.items():
            if csv_col in date_columns:
                # Convert date strings to MongoDB's date format
                document[mongo_attr] = date_to_milliseconds(row[csv_col])
            else:
                # Copy other fields as is
                document[mongo_attr] = row[csv_col]

        # Insert the document into MongoDB
        collection.insert_one(document)

    # Create indices
    for field in indices:
        collection.create_index(field)

    # Create compound indices
    if ind_comp is not None:
        for combo in ind_comp:
            collection.create_index(combo)

# Example usage
column_mapping = {
    'CSV_DateColumn': 'Mongo_DateField',
    # other mappings...
}
date_columns = ['CSV_DateColumn']  # List of DataFrame columns that are dates
indices = ['Mongo_DateField']
ind_comp = [['Mongo_DateField', 'another_field']]

# Assuming you have a pandas DataFrame 'df' and a MongoDB collection 'collection'
# df = pd.read_csv('your_data.csv')
# collection = your_mongodb_collection
# build_collection(df, collection, column_mapping, indices, date_columns, ind_comp)


In [3]:
# Test with local database

# MongoDB connection
## Create a connection to the MongoDB server
client = pymongo.MongoClient('localhost', 27017)
client.drop_database('sparkplug')

## Connect to a database (it will be created if it doesn't exist)
db = client['sparkplug']


In [4]:
# Build transactions collection
df_transactions = pd.read_csv('data/transactions.csv')
df_transactions = df_transactions.head(100) # For testing small sample, comment out for production

collection_transactions = db['transactions']

column_mapping_transactions = {
    'Station Name': 'station_name',
    'station_id': 'station_id',
    'simulated_start_date': 'start_date',
    'simulated_end_date': 'end_date',
    'Mongo Date': 'transaction_date',
    'Total Duration (hh:mm:ss)': 'total_duration',
    'Charging Time (hh:mm:ss)': 'charging_time',
    'Energy (kWh)': 'energy_kwh',
    'GHG Savings (kg)': 'ghg_savings_kg',
    'Gasoline Savings (gallons)': 'gas_savings_gal',
    'Port Type': 'charge_level',
    'Port Number': 'port_number',
    'Plug Type': 'plug_type',
    'City': 'city',
    'State/Province': 'state',
    'Postal Code': 'postal_code',
    'Country': 'country',
    'Currency': 'currency',
    'simulated_fee': 'fee',
    'Ended By': 'ended_by',
    'Plug In Event Id': 'plug_in_event_id',
    'User ID': 'user_id',
}

indices_transactions = ['station_id', 'charge_level', 'plug_type', 'postal_code', 'country', 'user_id']
ind_comp_transactions = [
    ['country', 'state', 'city'],
    ['country', 'state']
]

## Run builder
build_collection(df_transactions, collection_transactions, 
                 column_mapping_transactions, indices_transactions, ind_comp_transactions)

In [5]:
# Initiate station logs collection
collection_stations = db['station_logs']