In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import osmnx as ox
import networkx as nx
import plotly.express as px

## Access AFDC Data

In [None]:
alternative_fuels_data = pd.read_csv('datasets/alternative_fuels_data.csv')
alternative_fuels_data.head()

In [None]:
fuel_code_count = alternative_fuels_data['fuel_type_code'].value_counts(dropna=False).to_dict()
fuel_code_count

In [None]:
np.array(alternative_fuels_data.columns)

## Select Specific Columns From AFDC Data

In [None]:
enhanced_columns = [
    'station_name', 'city', 'state', 'zip', 'country', 'access_code',
    'latitude', 'longitude', 'fuel_type_code', 'status_code', 'open_date',
    'ev_connector_types', 'ev_dc_fast_num', 'ev_level1_evse_num',
    'ev_level2_evse_num', 'ev_network', 'ev_network_web', 'ev_other_evse',
    'ev_workplace_charging', 'ev_pricing'
]
alternative_fuels_data = alternative_fuels_data[enhanced_columns]

In [None]:
alternative_fuels_data.head()

## Specify AFDC Data From SDGE Territories and Electric Charging Stations Only

In [None]:
sdge_zip_csv = pd.read_csv('data/SDGE_zip.csv')
sdge_service_zip = sdge_zip_csv['ZipCode']
#sdge_service_zip = [str(zip) for zip in  sdge_service_zip]
charging_station_data = alternative_fuels_data[(alternative_fuels_data['zip'].isin(sdge_service_zip)) & (alternative_fuels_data['fuel_type_code'] == 'ELEC')]
# charging_station_data[charging_station_data['zip'] == 92122]
#data[data['station_name'].str.contains('UCSD', case = False, na = False)]

In [None]:
# There are 163 private EV charging stations
private_ev_stations = charging_station_data[charging_station_data['access_code'] == 'private']
private_ev_stations_count = len(private_ev_stations)
print(f'There are {private_ev_stations_count} private EV charging stations in SDGE territories.')

## Creating a Distribution of EV Networks Bar Graph in SDGE Territories

In [None]:
# Finding the distribution of ev_networks
ev_networks = charging_station_data.dropna(subset= 'ev_network')


ev_networks = ev_networks.groupby('ev_network').size().reset_index(name = 'station_count')
ev_networks.sort_values(by='station_count', ascending = False)

plt.figure(figsize = (12, 6))
plt.xticks(rotation = 90)
plt.bar(ev_networks['ev_network'], ev_networks['station_count'])
plt.xlabel('Network')
plt.ylabel('station_count')
plt.title('Distribution of EV Networks')


In [None]:
# time series EDA on how many public charging stations have opened throughout the past years

charging_station_data['open_date'] = pd.to_datetime(charging_station_data['open_date'])
public_ev_stations = charging_station_data[charging_station_data['access_code'] == 'public']

public_ev_stations['year'] = public_ev_stations['open_date'].dt.year
yearly_count = public_ev_stations.groupby('year').size().reset_index(name = 'num_stations')
yearly_count['cumulative_stations'] = yearly_count['num_stations'].cumsum()

plt.figure(figsize = (12, 6))
plt.plot(yearly_count['year'], yearly_count['cumulative_stations'], marker = 'o') 
plt.title('Growth of Public EV Charging Stations in SDG&E Territories')
plt.xlabel("year")
plt.ylabel('EV Station Count')
plt.grid()
plt.show()

In [None]:
# Create an interactive time series plot with Plotly
fig = px.line(
    yearly_count,
    x='year',
    y='cumulative_stations',
    title='Growth of Public Access Electric Charging Stations Over Time',
    labels={'year': 'Year', 'cumulative_stations': 'Number of Stations (Cumulative)'}
)

fig.add_annotation(
    x=2020, y=600,
    text="Sharp Increase between 2019-2020",
    showarrow=True,
    arrowhead=2,
    ax=0, ay=-40
)

fig.update_layout(width=1000, height=600)

fig.show()

In [None]:
public_ev_stations[public_ev_stations['station_name'] == 'UCSD Hopkins']

In [None]:
# Step 1: Fill NA values with 0 for all relevant columns
public_ev_stations['ev_level1_evse_num'] = public_ev_stations['ev_level1_evse_num'].fillna(0)
public_ev_stations['ev_level2_evse_num'] = public_ev_stations['ev_level2_evse_num'].fillna(0)
public_ev_stations['ev_dc_fast_num'] = public_ev_stations['ev_dc_fast_num'].fillna(0)

# Step 2: Ensure data types are numeric
public_ev_stations['ev_level1_evse_num'] = public_ev_stations['ev_level1_evse_num'].astype(float)
public_ev_stations['ev_level2_evse_num'] = public_ev_stations['ev_level2_evse_num'].astype(float)
public_ev_stations['ev_dc_fast_num'] = public_ev_stations['ev_dc_fast_num'].astype(float)

# Step 3: Perform groupby and sum operation (without cumulative sum first)
yearly_sum = public_ev_stations.groupby('year')[['ev_level1_evse_num', 'ev_level2_evse_num', 'ev_dc_fast_num']].sum().cumsum().reset_index()


fig = px.line(
    yearly_sum, 
    x='year', 
    y=['ev_level1_evse_num', 'ev_level2_evse_num', 'ev_dc_fast_num'],
    title='Growth of EV Charging Stations by Type Over Time',
    labels={'value': 'Cumulative Number of Chargers', 'year': 'Year'},
    line_shape='linear'
)

fig.update_layout(
    yaxis_title='Number of Charging Ports',
    legend_title='Charging Type',
    margin={"r":0, "t":40, "l":40, "b":40}
)

fig.show()

In [None]:
# Coordinates for SDG&E's main office and UCSD Hopkins station
sdge_location = (32.824850, -117.142928)  # SDG&E main office coordinates
ucsd_hopkins_station = (32.883572, -117.23895)  # UCSD Hopkins station coordinates

# Download the graph for San Diego, specifically for driving routes
G = ox.graph_from_place('San Diego, California, USA', network_type='drive')

# Get the nearest nodes in the graph for SDG&E and UCSD Hopkins station
sdge_node = ox.distance.nearest_nodes(G, sdge_location[1], sdge_location[0])
ucsd_node = ox.distance.nearest_nodes(G, ucsd_hopkins_station[1], ucsd_hopkins_station[0])

# Calculate the shortest path between the two nodes
route_nodes = nx.shortest_path(G, sdge_node, ucsd_node, weight='length')

# Plot the route on the map
fig, ax = ox.plot_graph_route(G, route_nodes, route_color="r", route_linewidth=2, node_size=0)

# Convert the path to a GeoDataFrame to calculate the length
gdf = ox.routing.route_to_gdf(G, route_nodes)
pathlength_km = np.round((gdf["length"].sum() / 1000), 2)
pathlength_miles = np.round(pathlength_km / 1.609, 2)
print(f"Driving distance from SDG&E to UCSD Hopkins station: {pathlength_km:.2f} km / {pathlength_miles:.2f} miles.")
