In [16]:
#import dependencies
import pandas as pd
from sqlalchemy import create_engine
from config import db_password
import time

In [17]:
#import the file from github to a dataframe
url = 'https://raw.githubusercontent.com/Bropell/Asthma_Analysis_in_California_Counties/main/Resources/Raw_Data/Charging%20Stations.csv'
charging_df = pd.read_csv(url)
charging_df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,Fuel Type Code,Station Name,Street Address,City,State,ZIP,Plus4,Station Phone,Expected Date,Groups With Access Code,...,EV Pricing (French),LPG Nozzle Types,Hydrogen Pressures,Hydrogen Standards,CNG Fill Type Code,CNG PSI,CNG Vehicle Class,LNG Vehicle Class,EV On-Site Renewable Source,Restricted Access
0,ELEC,LADWP - Truesdale Center,11797 Truesdale St,Sun Valley,CA,91352,,,,Private,...,,,,,,,,,,
1,ELEC,LADWP - West LA District Office,1394 S Sepulveda Blvd,Los Angeles,CA,90024,,,,Private,...,,,,,,,,,,
2,ELEC,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,,213-741-1151,,Public,...,,,,,,,,,,
3,ELEC,LADWP - John Ferraro Building,111 N Hope St,Los Angeles,CA,90012,,,,Private,...,,,,,,,,,,
4,ELEC,LADWP - Haynes Power Plant,6801 E 2nd St,Long Beach,CA,90803,,,,Private,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50284,ELEC,VIP LOT STATION1,1501 Kirkwood Meadows Dr,Markleeville,CA,96120,,888-758-4389,,Public,...,,,,,,,,,,
50285,ELEC,Prunedale Shopping Center,8065 San Miguel Canyon Rd,Salinas,CA,93907,,877-455-3833,,Public,...,,,,,,,,,,
50286,ELEC,Beaverton Electric Avenue,11665 SW Beaverton Hillsdale Hwy,Beaverton,OR,97005,,855-900-7584,,Public,...,,,,,,,,,,
50287,ELEC,Shell - Inman,2040 Highway 292,Inman,SC,29349,,855-900-7584,,Public,...,,,,,,,,,,


In [18]:
# Create a DF of the zipcodes for each County in California
zip_data = pd.read_csv('../Raw_Data/zip_code_database.csv')
zip_data = zip_data.loc[zip_data['state'] == 'CA']
zip_data = zip_data[['zip', 'county']]

In [19]:
# Rename column 
zip_data = zip_data.rename(columns={'zip':'ZIP'})

In [20]:
#reduce dataframe to just 'State', 'ZIP', 'Access Code', 'Latitude', 'Longitude' columns
charging_df = charging_df[['State', 'ZIP', 'Access Code', 'Latitude', 'Longitude']]

In [21]:
# Isolate CA chargers
charging_df = charging_df.loc[charging_df['State'] == 'CA']

In [22]:
# Merge zip dataframe and charging dataframe
charging_df2 = charging_df.merge(zip_data, on='ZIP',how='left')

In [23]:
# Create columns for counting public and private EV chargers
charging_df2['Total EV Chargers'] = 1
charging_df2
pub_priv = pd.get_dummies(charging_df2['Access Code'])
pub_priv

Unnamed: 0,private,public
0,1,0
1,1,0
2,0,1
3,1,0
4,1,0
...,...,...
14646,0,1
14647,0,1
14648,0,1
14649,0,1


In [24]:
# Add counts to charging DF
charging_df2['Private EV Chargers'] = pub_priv['private']
charging_df2['Public EV Chargers'] = pub_priv['public']
charging_df2

Unnamed: 0,State,ZIP,Access Code,Latitude,Longitude,county,Total EV Chargers,Private EV Chargers,Public EV Chargers
0,CA,91352,private,34.248319,-118.387971,Los Angeles,1,1,0
1,CA,90024,private,34.052542,-118.448504,Los Angeles,1,1,0
2,CA,90015,public,34.040539,-118.271387,Los Angeles,1,0,1
3,CA,90012,private,34.059133,-118.248589,Los Angeles,1,1,0
4,CA,90803,private,33.759802,-118.096665,Los Angeles,1,1,0
...,...,...,...,...,...,...,...,...,...
14646,CA,94555,public,37.540585,-122.077357,Alameda,1,0,1
14647,CA,94555,public,37.537684,-122.073972,Alameda,1,0,1
14648,CA,94555,public,37.537662,-122.073913,Alameda,1,0,1
14649,CA,96120,public,38.684660,-120.065169,Alpine,1,0,1


In [25]:
# Group by county and count total, private and public chargers, convert result to dataframe
private_chargers = pd.DataFrame(charging_df2.groupby('county')['Private EV Chargers'].sum()).reset_index()
public_chargers = pd.DataFrame(charging_df2.groupby('county')['Public EV Chargers'].sum()).reset_index()
all_chargers = pd.DataFrame(charging_df2.groupby('county')['Total EV Chargers'].sum()).reset_index()

In [26]:
glen_df = {'county': 'Glenn', 'Total EV Chargers':0,'Private EV Chargers':0,'Public EV Chargers':0}

In [27]:
# merge all counts into one dataframe
all_chargers = all_chargers.merge(private_chargers, on='county')
all_chargers = all_chargers.merge(public_chargers,on='county')
all_chargers

Unnamed: 0,county,Total EV Chargers,Private EV Chargers,Public EV Chargers
0,Alameda,78,12,66.0
1,Alpine,3,0,3.0
2,Amador,8,0,8.0
3,Butte,10,2,8.0
4,Calaveras,3,0,3.0
5,Colusa,1,0,1.0
6,Contra Costa,40,9,31.0
7,Del Norte,3,0,3.0
8,El Dorado,27,0,27.0
9,Fresno,91,24,67.0


In [28]:
all_chargers = all_chargers.append(glen_df, ignore_index=True)

In [29]:
#export the cleaned data to a csv file change to "DataSource to: ../Clean_Data/"
all_chargers.to_csv('../Clean_Data/CLEAN_EV_chargers.csv',index=False)

In [30]:
#import the dataframe to a table in sql

#Make sure to add a config.py file in the folder where this script is that has the following
#db_password = "Your Password for SQL"
#also change the database name to the database we are importing the table to.
#we use final_project for our database name and it is create prior to running this script
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/final_project"

engine = create_engine(db_string)

rows_imported = 0
# get the start_time from time.time()
start_time = time.time()
#replace the file location to that of which the cleaned data was saved to csv.
#file line should match that of the export file location and name
for data in pd.read_csv(f'../Clean_Data/CLEAN_EV_chargers.csv', chunksize=1000000):
    print(f'importing rows {rows_imported} to {rows_imported + len(data)}...', end='')
    #name the table of which the dataframe is imported to in sql
    data.to_sql(name='ev_chargers', con=engine, index=False)
    rows_imported += len(data)

    # add elapsed time to final print out
    print(f'Done. {time.time() - start_time} total seconds elapsed')

importing rows 0 to 58...Done. 0.08643674850463867 total seconds elapsed
