In [1]:
# Dependencies and Setup

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
from scipy.stats import linregress
import scipy.stats as st

In [2]:
# Import data source 
# (Ref:Source: US Department of Energy, https://afdc.energy.gov/stations/states, accessed 10 July 2023)
evdata20_df = pd.read_csv("Original_2020_historical-ev-station-counts.csv")

# Print dataframe
evdata20_df.head()


Unnamed: 0,Station Counts by State and Fuel Type,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,State,Biodiesel,CNG,E85,Electrica,Hydrogenb,LNG,Propanec,Totald
1,,,,,(stations / charging outlets,(retail / non-retail / total),,(primary / secondary / total),
2,,,,,Level 1 / Level 2 / DC Fast),,,,
3,Alabama,10,30,29,223 / 596,0 / 0 / 0,2,24 / 45 / 69,736
4,,,,,56 / 449 / 91,,,,


In [3]:
# Set first row as header & reprint dataframe
# Code Ref:  Zach, 4 August 2022, Statology, "How to set first row as header", 
# https://www.statology.org/pandas-set-first-row-as-header/), accessed 12 July 2023

evdata20_df.columns = evdata20_df.iloc[0]
evdata20_df = evdata20_df[1:]
evdata20_df.head()

# Drop rows from "State" column with "Nan" values
evdata20_df.dropna(subset = ["State"], inplace=True)
evdata20_df

Unnamed: 0,State,Biodiesel,CNG,E85,Electrica,Hydrogenb,LNG,Propanec,Totald
3,Alabama,10.0,30.0,29.0,223 / 596,0 / 0 / 0,2.0,24 / 45 / 69,736.0
5,Alaska,0.0,1.0,0.0,25 / 45,0 / 0 / 0,0.0,1 / 2 / 3,49.0
7,Arizona,75.0,30.0,20.0,"574 / 1,778",0 / 1 / 1,7.0,28 / 50 / 78,1989.0
9,Arkansas,3.0,15.0,50.0,112 / 350,0 / 0 / 0,1.0,9 / 27 / 36,455.0
11,California,27.0,322.0,198.0,"7,671 / 34,622",43 / 6 / 49,41.0,87 / 181 / 268,35527.0
13,Colorado,7.0,34.0,88.0,"1,062 / 3,234",0 / 1 / 1,0.0,12 / 41 / 53,3417.0
15,Connecticut,1.0,18.0,3.0,"456 / 1,240",1 / 1 / 2,0.0,8 / 11 / 19,1283.0
17,Delaware,1.0,2.0,1.0,70 / 206,0 / 1 / 1,0.0,7 / 2 / 9,220.0
19,District of Columbia,7.0,2.0,3.0,190 / 701,0 / 0 / 0,0.0,0 / 0 / 0,713.0
21,Florida,10.0,55.0,94.0,"1,822 / 5,519",0 / 0 / 0,3.0,24 / 89 / 113,5794.0


In [4]:
# Rename column "Electrica" to "No.EV Charge Stations / No. EV charge outlets"
# Code Ref: tozCSS, 17 September 2021, StackOverflow, "How to rename columns in pandas (with examples)"
# (https://www.statology.org/pandas-rename-columns/), accessed 12 July 2023

evbase20_df = evdata20_df.rename(columns=str).rename(columns={"Electrica":"2020 - No.EV Charge Stations/2020 - No. EV charge outlets"})

# Retain State and column 4 ("Electric (stations/charging outlets)"
# Code Ref:  Zach, 9 November 2021, Statology, "How to Select Columns by Index in a Pandas DataFrame", 
# (https://www.statology.org/pandas-select-column-by-index/#:~:text=If%20you'd%20like%20to,loc%20function.), accessed 12 July 2023

evtrim20_df = evbase20_df.iloc[:,[0,4]]
evtrim20_df.head(3)

Unnamed: 0,State,2020 - No.EV Charge Stations/2020 - No. EV charge outlets
3,Alabama,223 / 596
5,Alaska,25 / 45
7,Arizona,"574 / 1,778"


In [5]:
# split column 2 by "/" delimeter
# Code Ref:  Zach, 21 July 2021, Statology, "How to Split String Column in Pandas into Multiple Columns", 
# (https://www.statology.org/pandas-split-column/), accessed 12 July 2023

evtrim20_df[["2020 - No.EV Charge Stations", "2020 - No. EV charge outlets"]] = evtrim20_df["2020 - No.EV Charge Stations/2020 - No. EV charge outlets"].str.split("/", 1, expand=True)

# Delete original 'split' columns 
evclean20_df = evtrim20_df.drop("2020 - No.EV Charge Stations/2020 - No. EV charge outlets", axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evtrim20_df[["2020 - No.EV Charge Stations", "2020 - No. EV charge outlets"]] = evtrim20_df["2020 - No.EV Charge Stations/2020 - No. EV charge outlets"].str.split("/", 1, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evtrim20_df[["2020 - No.EV Charge Stations", "2020 - No. EV charge outlets"]] = evtrim20_df["2020 - No.EV Charge Stations/2020 - No. EV charge outlets"].str.split("/", 1, expand=True)


In [6]:
# Remove last row by index number
# Code Ref:  Zach, 14 may 2021, Statology, "How to drop rows by index in pandas", 
# (https://www.statology.org/pandas-drop-row-by-index/), accessed 12 July 2023

evfinal20_df = evclean20_df.drop(index=[105, 109, 110, 111, 112])

evfinal20_df

Unnamed: 0,State,2020 - No.EV Charge Stations,2020 - No. EV charge outlets
3,Alabama,223,596
5,Alaska,25,45
7,Arizona,574,1778
9,Arkansas,112,350
11,California,7671,34622
13,Colorado,1062,3234
15,Connecticut,456,1240
17,Delaware,70,206
19,District of Columbia,190,701
21,Florida,1822,5519


In [7]:
# Export the data into a csv
evfinal20_df.to_csv("Clean_2020_EV_ChargeStation.csv", index_label="Index")