In [1]:
import pymongo
import pandas as pd
import os

import pymongo.errors as mongo_errors

import certifi

In [2]:
def build_collection(df, collection, column_mapping, indices, ind_comp=None):
    """
    Inserts data from a pandas DataFrame into a MongoDB collection based on specified column mapping.

    :param df: pandas DataFrame containing the data to be inserted.
    :param collection: MongoDB collection object where the data will be inserted.
    :param column_mapping: Dictionary mapping the DataFrame column names to MongoDB document field names.
    :param indices: List of field names to create index for.
    :param ind_comp: List of list containing multiple fields to create compound indices.
    """

    # Load documents
    for index, row in df.iterrows():
        # Create a document for each row using the column mapping
        document = {mongo_attr: row[csv_col] for csv_col, mongo_attr in column_mapping.items()}

        # Insert the document into MongoDB
        collection.insert_one(document)

    
    # Create indices
    for field in indices:
        collection.create_index(field)
    
    # Create compound indices
    if ind_comp is not None:
        for combo in ind_comp:
            f_list = [];
            for field in combo:
                f_list.append((field))
            collection.create_index(f_list)


In [3]:
# Test with local database
"""
# MongoDB connection
## Create a connection to the MongoDB server
client = pymongo.MongoClient('localhost', 27017)
client.drop_database('sparkplug')

## Connect to a database (it will be created if it doesn't exist)
db = client['sparkplug']
"""

"\n# MongoDB connection\n## Create a connection to the MongoDB server\nclient = pymongo.MongoClient('localhost', 27017)\nclient.drop_database('sparkplug')\n\n## Connect to a database (it will be created if it doesn't exist)\ndb = client['sparkplug']\n"

In [4]:
# Connection details
username = 'sparkplug'
password = '5jkS0ew4zVnACstU'
host = 'sparkplug.i7nlrbn.mongodb.net'  # e.g., 'localhost' or an IP address
port = '27017'  # Default MongoDB port
database_name = 'myDatabase'

# Create the connection URI
#connection_uri = f"mongodb://{username}:{password}@{host}:{port}/{database_name}"
ca = certifi.where()
connection_uri = 'mongodb+srv://sparkplug:5jkS0ew4zVnACstU@sparkplug.i7nlrbn.mongodb.net/' \
'?retryWrites=true&w=majority' + '&tlsCAFile=' + ca

print(connection_uri)

# Connect to MongoDB
client = pymongo.MongoClient(connection_uri)
#client.drop_database('sparkplug')

# Access the specific database
db = client['sparkplug']

mongodb+srv://sparkplug:5jkS0ew4zVnACstU@sparkplug.i7nlrbn.mongodb.net/?retryWrites=true&w=majority&tlsCAFile=/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/certifi/cacert.pem


In [5]:
# Test connection
"""
try:
    # Attempt to retrieve the server status
    server_status = client.server_info()
    print("Connected to MongoDB server:", server_status)
except mongo_errors.ConnectionFailure as e:
    print("Connection Failure: ", e)
except mongo_errors.ConfigurationError as e:
    print("Configuration Error: ", e)
except mongo_errors.PyMongoError as e:
    print("PyMongo Error: ", e)
except Exception as e:
    print("An error occurred: ", e)
"""

'\ntry:\n    # Attempt to retrieve the server status\n    server_status = client.server_info()\n    print("Connected to MongoDB server:", server_status)\nexcept mongo_errors.ConnectionFailure as e:\n    print("Connection Failure: ", e)\nexcept mongo_errors.ConfigurationError as e:\n    print("Configuration Error: ", e)\nexcept mongo_errors.PyMongoError as e:\n    print("PyMongo Error: ", e)\nexcept Exception as e:\n    print("An error occurred: ", e)\n'

In [6]:
# Build transactions collection
df_transactions = pd.read_csv('data/transactions.csv')
collection_transactions = db['transactions']

column_mapping_transactions = {
    'Station Name': 'station_name',
    'station_id': 'station_id',
    'Start Date': 'start_date',
    'End Date': 'end_date',
    'Transaction Date (Pacific Time)': 'transaction_date',
    'Total Duration (hh:mm:ss)': 'total_duration',
    'Charging Time (hh:mm:ss)': 'charging_time',
    'Energy (kWh)': 'energy',
    'GHG Savings (kg)': 'ghg_savings',
    'Gasoline Savings (gallons)': 'gas_savings',
    'Port Type': 'charge_level',
    'Port Number': 'port_number',
    'Plug Type': 'plug_type',
    'City': 'city',
    'State/Province': 'state',
    'Postal Code': 'postal_code',
    'Country': 'country',
    'Currency': 'currency',
    'Fee': 'fee',
    'Ended By': 'ended_by',
    'Plug In Event Id': 'plug_in_event_id',
    'User ID': 'user_id',
}

indices_transactions = ['station_id', 'charge_level', 'plug_type', 'postal_code', 'country', 'user_id']
ind_comp_transactions = [
    ['country', 'state', 'city'],
    ['country', 'state']
]

## Run builder
build_collection(df_transactions, collection_transactions, 
                 column_mapping_transactions, indices_transactions, ind_comp_transactions)

In [7]:
# Build stations collection
df_stations = pd.read_csv('data/stations.csv')
collection_stations = db['stations']

column_mapping_stations = {
    'id': 'station_id',
    'price': 'price',
    'site_id': 'site_id',
    'mech_status': 'mech_status',
    'elec_status': 'elec_status',
    'net_status': 'net_status',
    'update_log': 'update_log',
}
# IMPORTANT: need to store update log for each station, to calculate use time and downtime

indices_stations = ['site_id', 'mech_status', 'elec_status', 'net_status']

build_collection(df_stations, collection_stations, 
                 column_mapping_stations, indices_stations)

In [10]:
# Cleanup
collection_transactions = db['transactions']

collection_transactions.delete_many({})

DeleteResult({'n': 50, 'electionId': ObjectId('7fffffff00000000000000c5'), 'opTime': {'ts': Timestamp(1701502628, 57), 't': 197}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1701502628, 57), 'signature': {'hash': b'\xc6\xf5c\xf5"n\t\xa1O\xb3\x10W{\xab\xd8*\x00\xc26\\', 'keyId': 7247536804084056066}}, 'operationTime': Timestamp(1701502628, 57)}, acknowledged=True)