In [1]:
# Dependencies and Setup
import pathlib as Path
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
from scipy.stats import linregress
import scipy.stats as st

In [2]:
# Import data source 
# (Ref:Source: US Department of Energy, https://afdc.energy.gov/stations/states, accessed 10 July 2023)
evdata21_df = pd.read_csv("Resources/Original_2021_historical-ev-station-counts.csv")

# Print dataframe
evdata21_df.head()

# Set first row as header & reprint dataframe
# Code Ref:  Zach, 4 August 2022, Statology, "How to set first row as header", 
# https://www.statology.org/pandas-set-first-row-as-header/), accessed 12 July 2023

evdata21_df.columns = evdata21_df.iloc[0]
evdata21v1_df = evdata21_df[1:]
evdata21v1_df.head()

Unnamed: 0,State,Biodiesel,CNG,E85,Electrica,Hydrogenb,LNG,Propanec,Totald
1,,,,,(stations / charging outlets,(retail / non-retail / total),,(primary / secondary / total),
2,,,,,Level 1 / Level 2 / DC Fast),,,,
3,Alabama,10.0,30.0,33.0,276 | 680,0 | 0 | 0,2.0,26 | 42 | 68,823.0
4,,,,,35 | 527 | 118,,,,
5,Alaska,0.0,1.0,0.0,52 | 94,0 | 0 | 0,0.0,1 | 1 | 2,97.0


In [3]:
#Drop rows from "State" and "Electrica" columns with "Nan" values
evdata21v1_df.dropna(subset=["State","Electrica"], inplace=True)
evdata21v1_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evdata21v1_df.dropna(subset=["State","Electrica"], inplace=True)


Unnamed: 0,State,Biodiesel,CNG,E85,Electrica,Hydrogenb,LNG,Propanec,Totald
3,Alabama,10,30,33,276 | 680,0 | 0 | 0,2,26 | 42 | 68,823
5,Alaska,0,1,0,52 | 94,0 | 0 | 0,0,1 | 1 | 2,97
7,Arizona,75,29,19,"890 | 2,257",0 | 1 | 1,7,36 | 33 | 69,2457
9,Arkansas,17,15,70,160 | 453,0 | 0 | 0,0,9 | 26 | 35,590
11,California,27,323,264,"14,616 | 41,225",47 | 5 | 52,41,135 | 126 | 261,42193
13,Colorado,6,31,96,"1,614 | 3,978",0 | 1 | 1,1,26 | 24 | 50,4163
15,Connecticut,1,17,3,"533 | 1,430",0 | 1 | 1,0,10 | 10 | 20,1472
17,Delaware,0,2,1,134 | 314,0 | 1 | 1,0,6 | 3 | 9,327
19,District of Columbia,8,2,4,289 | 822,0 | 0 | 0,0,0 | 6 | 6,842
21,Florida,7,56,111,"2,624 | 6,723",0 | 0 | 0,3,75 | 64 | 139,7039


In [4]:
# Rename column "Electrica" to "No.EV Charge Stations / No. EV charge outlets"
# Code Ref: tozCSS, 17 September 2021, StackOverflow, "How to rename columns in pandas (with examples)"
# (https://www.statology.org/pandas-rename-columns/), accessed 12 July 2023

evbase21v1_df = evdata21v1_df.rename(columns=str).rename(columns={"Electrica":"2021 - No.EV Charge Stations | 2021 - No. EV charge outlets"})

# Retain State and column 4 ("Electric (stations/charging outlets)"
# Code Ref:  Zach, 9 November 2021, Statology, "How to Select Columns by Index in a Pandas DataFrame", 
# (https://www.statology.org/pandas-select-column-by-index/#:~:text=If%20you'd%20like%20to,loc%20function.), accessed 12 July 2023

evtrim21v1_df = evbase21v1_df.iloc[:,[0,4]]


In [5]:
# split column 2 by "/" delimeter
# Code Ref:  Zach, 21 July 2021, Statology, "How to Split String Column in Pandas into Multiple Columns", 
# (https://www.statology.org/pandas-split-column/), accessed 12 July 2023

evtrim21v1_df[["2021 - No.EV Charge Stations", "2021 - No. EV charge outlets"]] = evtrim21v1_df["2021 - No.EV Charge Stations | 2021 - No. EV charge outlets"].str.split("|", 1, expand=True)

# Delete original 'split' columns 
evclean21_df = evtrim21v1_df.drop("2021 - No.EV Charge Stations | 2021 - No. EV charge outlets", axis=1)
evclean21_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evtrim21v1_df[["2021 - No.EV Charge Stations", "2021 - No. EV charge outlets"]] = evtrim21v1_df["2021 - No.EV Charge Stations | 2021 - No. EV charge outlets"].str.split("|", 1, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evtrim21v1_df[["2021 - No.EV Charge Stations", "2021 - No. EV charge outlets"]] = evtrim21v1_df["2021 - No.EV Charge Stations | 2021 - No. EV charge outlets"].str.split("|", 1, expand=True)


Unnamed: 0,State,2021 - No.EV Charge Stations,2021 - No. EV charge outlets
3,Alabama,276,680
5,Alaska,52,94
7,Arizona,890,2257
9,Arkansas,160,453
11,California,14616,41225
13,Colorado,1614,3978
15,Connecticut,533,1430
17,Delaware,134,314
19,District of Columbia,289,822
21,Florida,2624,6723


In [6]:
# Remove last row by index number
# Code Ref:  Zach, 14 may 2021, Statology, "How to drop rows by index in pandas", 
# (https://www.statology.org/pandas-drop-row-by-index/), accessed 12 July 2023

evfinal21_df = evclean21_df.drop(index=[105])
evfinal21_df

Unnamed: 0,State,2021 - No.EV Charge Stations,2021 - No. EV charge outlets
3,Alabama,276,680
5,Alaska,52,94
7,Arizona,890,2257
9,Arkansas,160,453
11,California,14616,41225
13,Colorado,1614,3978
15,Connecticut,533,1430
17,Delaware,134,314
19,District of Columbia,289,822
21,Florida,2624,6723


In [7]:
# Export the data into a csv
evfinal21_df.to_csv("Resources/Clean_2021_EV_ChargeStation.csv", index_label="Index")