In [43]:
import pandas as pd

import os
import pymongo
from pymongo.errors import AutoReconnect
import time
from dotenv import load_dotenv

In [44]:
EV_df = pd.read_csv("Electric_Vehicle_Population_Data.csv")
EV_df.head(10)

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
0,5YJSA1E65N,Yakima,Granger,WA,98932.0,2022,TESLA,MODEL S,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,15.0,187279214,POINT (-120.1871 46.33949),PACIFICORP,53077000000.0
1,KNDC3DLC5N,Yakima,Yakima,WA,98902.0,2022,KIA,EV6,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,15.0,210098241,POINT (-120.52041 46.59751),PACIFICORP,53077000000.0
2,5YJYGDEEXL,Snohomish,Everett,WA,98208.0,2020,TESLA,MODEL Y,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,291.0,0.0,44.0,121781950,POINT (-122.18637 47.89251),PUGET SOUND ENERGY INC,53061040000.0
3,3C3CFFGE1G,Yakima,Yakima,WA,98908.0,2016,FIAT,500,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,84.0,0.0,14.0,180778377,POINT (-120.60199 46.59817),PACIFICORP,53077000000.0
4,KNDCC3LD5K,Kitsap,Bremerton,WA,98312.0,2019,KIA,NIRO,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,26.0,0.0,26.0,2581225,POINT (-122.65223 47.57192),PUGET SOUND ENERGY INC,53035080000.0
5,5YJXCAE29L,Kitsap,Silverdale,WA,98383.0,2020,TESLA,MODEL X,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,293.0,0.0,23.0,1843054,POINT (-122.69275 47.65171),PUGET SOUND ENERGY INC,53035090000.0
6,5YJ3E1EB6L,King,Kent,WA,98030.0,2020,TESLA,MODEL 3,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,322.0,0.0,47.0,182822020,POINT (-122.19975 47.37483),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),53033030000.0
7,JTDKN3DP9F,Kitsap,Bainbridge Island,WA,98110.0,2015,TOYOTA,PRIUS,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,6.0,0.0,23.0,177904170,POINT (-122.521 47.62732),PUGET SOUND ENERGY INC,53035090000.0
8,1G1FY6S07L,Kitsap,Port Orchard,WA,98367.0,2020,CHEVROLET,BOLT EV,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,259.0,0.0,26.0,132558002,POINT (-122.68471 47.50524),PUGET SOUND ENERGY INC,53035090000.0
9,1G1RD6S55K,Yakima,Yakima,WA,98908.0,2019,CHEVROLET,VOLT,Plug-in Hybrid Electric Vehicle (PHEV),Clean Alternative Fuel Vehicle Eligible,53.0,0.0,14.0,474853417,POINT (-120.60199 46.59817),PACIFICORP,53077000000.0


In [45]:
EV_df.shape

(250659, 17)

In [46]:
EV_df.columns

Index(['VIN (1-10)', 'County', 'City', 'State', 'Postal Code', 'Model Year',
       'Make', 'Model', 'Electric Vehicle Type',
       'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Electric Range',
       'Base MSRP', 'Legislative District', 'DOL Vehicle ID',
       'Vehicle Location', 'Electric Utility', '2020 Census Tract'],
      dtype='object')

In [47]:
EV_data = EV_df.to_dict(orient="records")
EV_data

[{'VIN (1-10)': '5YJSA1E65N',
  'County': 'Yakima',
  'City': 'Granger',
  'State': 'WA',
  'Postal Code': 98932.0,
  'Model Year': 2022,
  'Make': 'TESLA',
  'Model': 'MODEL S',
  'Electric Vehicle Type': 'Battery Electric Vehicle (BEV)',
  'Clean Alternative Fuel Vehicle (CAFV) Eligibility': 'Eligibility unknown as battery range has not been researched',
  'Electric Range': 0.0,
  'Base MSRP': 0.0,
  'Legislative District': 15.0,
  'DOL Vehicle ID': 187279214,
  'Vehicle Location': 'POINT (-120.1871 46.33949)',
  'Electric Utility': 'PACIFICORP',
  '2020 Census Tract': 53077002104.0},
 {'VIN (1-10)': 'KNDC3DLC5N',
  'County': 'Yakima',
  'City': 'Yakima',
  'State': 'WA',
  'Postal Code': 98902.0,
  'Model Year': 2022,
  'Make': 'KIA',
  'Model': 'EV6',
  'Electric Vehicle Type': 'Battery Electric Vehicle (BEV)',
  'Clean Alternative Fuel Vehicle (CAFV) Eligibility': 'Eligibility unknown as battery range has not been researched',
  'Electric Range': 0.0,
  'Base MSRP': 0.0,
  'Legisl

In [48]:
len(EV_data)

250659

In [49]:
DB_NAME = "Electric_Vehicle"
COLLECTION_NAME = "ev_data"
load_dotenv()
mongo_url=os.getenv("MONGO_URL")

In [41]:
client = pymongo.MongoClient(mongo_url, serverSelectionTimeoutMS=30000, socketTimeoutMS=30000)
data_base = client[DB_NAME]
collection=data_base[COLLECTION_NAME]
#use batch loading to inser the data into MongoDB because th dataset is large

BATCH_SIZE = 500
MAX_RETRIES = 3

for i in range(0, len(EV_data), BATCH_SIZE):
    batch = EV_data[i:i + BATCH_SIZE]
    for attempt in range(MAX_RETRIES):
        try:
            collection.insert_many(batch)
            break
        except AutoReconnect as e:
            print(f"Retrying batch insert ({i}-{i+BATCH_SIZE}) after error : {e}") 
            time.sleep(5)
    else:
        print(f"Failed to insert batch {i} - {i+BATCH_SIZE} after retries ")