In [8]:
# Dependencies and Setup

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
from scipy.stats import linregress
import scipy.stats as st

In [9]:
# Import data source 
# (Ref:Source: US Department of Energy, https://afdc.energy.gov/stations/states, accessed 10 July 2023)
evdata19_df = pd.read_csv("Original_2019_historical-ev-station-counts.csv")

# Print dataframe
evdata19_df.head()


Unnamed: 0,Station Counts by State and Fuel Type,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,State,Biodiesel,CNG,E85,,,LNG,,Total
1,,,,,Electric,,,,
2,,,,,(stations/charging outlets),Hydrogen,,Propane,
3,Alabama,10,27,34,195 / 506,0,2,72,651
4,,,,,,,,,


In [10]:
# Set first row as header & reprint dataframe
# Code Ref:  Zach, 4 August 2022, Statology, "How to set first row as header", 
# https://www.statology.org/pandas-set-first-row-as-header/), accessed 12 July 2023

evdata19_df.columns = evdata19_df.iloc[0]
evdata19_df = evdata19_df[1:]
evdata19_df.head()

# Drop rows from "State" column with "Nan" values
evdata19_df.dropna(subset = ["State"], inplace=True)
list(evdata19_df.columns)


['State', 'Biodiesel', 'CNG', 'E85', nan, nan, 'LNG', nan, 'Total']

In [11]:
# Rename column "Nan" to "No.EV Charge Stations / No. EV charge outlets"
# Code Ref: tozCSS, 15 September 2019, StackOverflow, "Rename Pandas dataframe with Nan header"
# (https://stackoverflow.com/questions/45545675/rename-pandas-dataframe-with-nan-header), accessed 12 July 2023

evbase19_df = evdata19_df.rename(columns=str).rename(columns={"nan":"No.EV Charge Stations/No. EV charge outlets"})

# Retain State and column 4 ("Electric (stations/charging outlets)"
# Code Ref:  Zach, 9 November 2021, Statology, "How to Select Columns by Index in a Pandas DataFrame", 
# (https://www.statology.org/pandas-select-column-by-index/#:~:text=If%20you'd%20like%20to,loc%20function.), accessed 12 July 2023

evtrim19_df = evbase19_df.iloc[:,[0,4]]

evtrim19_df.head(3)

Unnamed: 0,State,No.EV Charge Stations/No. EV charge outlets
3,Alabama,195 / 506
5,Alaska,19 / 34
7,Arizona,"522 / 1,476"


In [12]:
# split column 2 by "/" delimeter
# Code Ref:  Zach, 21 July 2021, Statology, "How to Split String Column in Pandas into Multiple Columns", 
# (https://www.statology.org/pandas-split-column/), accessed 12 July 2023

evtrim19_df[["2019 - No.EV Charge Stations", "2019 - No. EV charge outlets"]] = evtrim19_df["No.EV Charge Stations/No. EV charge outlets"].str.split("/", 1, expand=True)

# Delete original 'split' columns 
evclean19_df = evtrim19_df.drop("No.EV Charge Stations/No. EV charge outlets", axis=1)
evclean19_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evtrim19_df[["2019 - No.EV Charge Stations", "2019 - No. EV charge outlets"]] = evtrim19_df["No.EV Charge Stations/No. EV charge outlets"].str.split("/", 1, expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  evtrim19_df[["2019 - No.EV Charge Stations", "2019 - No. EV charge outlets"]] = evtrim19_df["No.EV Charge Stations/No. EV charge outlets"].str.split("/", 1, expand=True)


Unnamed: 0,State,2019 - No.EV Charge Stations,2019 - No. EV charge outlets
3,Alabama,195,506
5,Alaska,19,34
7,Arizona,522,1476
9,Arkansas,98,290
11,California,6435,27128
13,Colorado,861,2516
15,Connecticut,422,1048
17,Delaware,57,172
19,District of Columbia,172,553
21,Florida,1562,4562


In [13]:
# Remove last row by index number
# Code Ref:  Zach, 14 may 2021, Statology, "How to drop rows by index in pandas", 
# (https://www.statology.org/pandas-drop-row-by-index/), accessed 12 July 2023

evfinal19_df = evclean19_df.drop(index=[105])

evfinal19_df

Unnamed: 0,State,2019 - No.EV Charge Stations,2019 - No. EV charge outlets
3,Alabama,195,506
5,Alaska,19,34
7,Arizona,522,1476
9,Arkansas,98,290
11,California,6435,27128
13,Colorado,861,2516
15,Connecticut,422,1048
17,Delaware,57,172
19,District of Columbia,172,553
21,Florida,1562,4562


In [14]:
# Export the data into a csv
evfinal19_df.to_csv("Clean_2019_EV_ChargeStation.csv", index_label="Index")