In [None]:
import pandas as pd
%matplotlib inline
from cenpy import products
import geopandas as gpd
import folium
from folium import Choropleth, GeoJsonTooltip

## Using Cenpy to Access Census Data of Median Income and Population

In [None]:
# Connect to the ACS dataset
connection = products.APIConnection('ACSDT5Y2022')  # Replace with desired ACS year

# Define the variable codes for median income and population
income_variable = 'B19013_001E'  # Median Household Income
population_variable = 'B01003_001E'  # Total Population

# Retrieve median income data for San Diego's ZIP codes
income_data = connection.query(
    cols=[income_variable],
    geo_unit='zip code tabulation area'
)

# Rename column for clarity
income_data = income_data.rename(columns={
    'zip code tabulation area': 'zip',
    income_variable: 'median_income'
})

# Retrieve population data for San Diego's ZIP codes
population_data = connection.query(
    cols=[population_variable],
    geo_unit='zip code tabulation area'
)

# Rename column for clarity
population_data = population_data.rename(columns={
    'zip code tabulation area': 'zip',
    population_variable: 'population'
})

# Filter both tables to focus on SDG&E ZIP codes
sdge_zip_csv = pd.read_csv('data/SDGE_zip.csv')
sdge_service_zip = sdge_zip_csv['ZipCode'].astype(str)  # Ensure ZIP codes are strings

income_data = income_data[income_data['zip'].isin(sdge_service_zip)]
population_data = population_data[population_data['zip'].isin(sdge_service_zip)]

# Display the separate tables
print("Median Income Data:")
print(income_data.sort_values('median_income').head())

print("\nPopulation Data:")
print(population_data.head())


In [None]:
income_data['median_income'] = (
    income_data['median_income']
    .astype(str)                      # Ensure all values are strings for cleaning
    .str.strip()                      # Remove leading/trailing spaces
    .str.replace(r'[^\d]', '', regex=True)  # Remove non-numeric characters
    .astype(int)                      # Convert to integers
)
income_data.sort_values('median_income', ascending = False)

## Accessing AFDC Data To Perform Geospatial Analysis With Cenpy

In [None]:
alternative_fuels_data = pd.read_csv("datasets/alternative_fuels_data.csv")
enhanced_columns = [
    'station_name', 'city', 'state', 'zip', 'country', 'access_code',
    'latitude', 'longitude', 'fuel_type_code', 'status_code', 'open_date',
    'ev_connector_types', 'ev_dc_fast_num', 'ev_level1_evse_num',
    'ev_level2_evse_num', 'ev_network', 'ev_network_web', 'ev_other_evse',
    'ev_workplace_charging', 'ev_pricing'
]
alternative_fuels_data = alternative_fuels_data[enhanced_columns]
sdge_service_data = pd.read_csv("data/SDGE_zip.csv")
sdge_service_zip = sdge_service_data['ZipCode']
charging_station_data = alternative_fuels_data[(alternative_fuels_data['zip'].isin(sdge_service_zip)) & (alternative_fuels_data['fuel_type_code'] == 'ELEC')]
charging_station_data

In [None]:
charger_counts = charging_station_data.groupby("zip").size().reset_index(name = "charger_count")
charger_counts['zip'] = charger_counts['zip'].astype(int).astype(str)
charger_counts

In [None]:
income_data['zip'] = income_data['zip'].astype(str)

# Merge with income data based on ZIP code
merged_income_data = pd.merge(income_data, charger_counts, on='zip', how='left')
merged_income_data['median_income'] = pd.to_numeric(merged_income_data['median_income'], errors='coerce').fillna(0)
merged_income_data['charger_count'] = pd.to_numeric(merged_income_data['charger_count'], errors='coerce').fillna(0)
merged_income_data = merged_income_data[merged_income_data['median_income'] >= 0]
merged_income_data = merged_income_data[merged_income_data['median_income'] <= 1000000]

merged_population_data = pd.merge(population_data, charger_counts, on='zip', how='left')
merged_population_data['population'] = pd.to_numeric(merged_population_data['population'], errors='coerce').fillna(0)
merged_population_data['charger_count'] = pd.to_numeric(merged_population_data['charger_count'], errors='coerce').fillna(0)
merged_population_data = merged_population_data[merged_population_data['population'] >= 0]
merged_population_data = merged_population_data[merged_population_data['population'] <= 1000000]

print(merged_income_data)
print(merged_population_data)

## Access Shapefile from ZCTA in Census.gov for Zipcode Division

In [None]:
# Load the ZCTA shapefile
zip_shapefile = gpd.read_file('datasets/tl_2024_us_zcta520/tl_2024_us_zcta520.shp')
zip_shapefile['ZCTA5CE20'] = zip_shapefile['ZCTA5CE20'].astype(str)


## Displaying the density of EV chargers in SDG&E Territories

In [None]:
# Merge charger data with ZIP code shapefile for San Diego area
san_diego_chargers = zip_shapefile.merge(charger_counts, left_on='ZCTA5CE20', right_on='zip', how='inner')
san_diego_chargers['charger_count'] = san_diego_chargers['charger_count'].fillna(0)  # Fill NaN values with 0

# Initialize the map centered around San Diego
m = folium.Map(location=[32.8, -117.2], zoom_start=10)

# Add a choropleth layer for EV Charger Density by ZIP code
Choropleth(
    geo_data=san_diego_chargers.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=san_diego_chargers,
    columns=['ZCTA5CE20', 'charger_count'], # Use ZIP code and charger count
    key_on='feature.properties.ZCTA5CE20',  # Match ZIP code in the shapefile
    fill_color='Greens',                    # Color scale for chargers
    fill_opacity=0.6,
    line_opacity=0.2,
    legend_name='EV Charger Density by ZIP Code'
).add_to(m)

# Add tooltips for interactive display
tooltip = GeoJsonTooltip(
    fields=['ZCTA5CE20', 'charger_count'],
    aliases=['ZIP Code:', 'EV Charger Count:'],
    localize=True
)
folium.GeoJson(
    san_diego_chargers.to_json(),
    tooltip=tooltip
).add_to(m)

m.save('assets/cenpy_assets/ev_charger_density_by_zip.html') # Save html to assets

# Display the map
m

## Merging the Population Data with EV Station Density

In [None]:
# Convert ZIP codes in population data to string for merging
merged_population_data['zip'] = merged_population_data['zip'].astype(str)

# Merge population data with the shapefile
map_zip_population = zip_shapefile.merge(merged_population_data, left_on='ZCTA5CE20', right_on='zip', how='inner')

# Fill NaN values in population column (if necessary) and sort
map_zip_population['population'] = map_zip_population['population'].fillna(0)
map_zip_population = map_zip_population.sort_values(by='population', ascending=False)

map_zip_population

## Displaying Population Density Data and Laying With EV Station Density

In [None]:
# Initialize the map centered around San Diego
m = folium.Map(location=[32.8, -117.2], zoom_start=10)

# Add a choropleth layer for population data
Choropleth(
    geo_data=map_zip_population.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=map_zip_population,
    columns=['ZCTA5CE20', 'population'],    # Use ZIP code and population
    key_on='feature.properties.ZCTA5CE20',  # Match ZIP code in the shapefile
    fill_color='Reds',                    # Color scale for population
    fill_opacity=0.6,
    line_opacity=0.2,
    legend_name='Population Density by ZIP Code'
).add_to(m)

# Add a second choropleth layer for charger density
Choropleth(
    geo_data=map_zip_population.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=map_zip_population,
    columns=['ZCTA5CE20', 'charger_count'],  # Use ZIP code and charger count
    key_on='feature.properties.ZCTA5CE20',   # Match ZIP code in the shapefile
    fill_color='Greens',                     # Color scale for chargers
    fill_opacity=0.4,
    line_opacity=0.2,
    legend_name='EV Charger Density by ZIP Code'
).add_to(m)

# Add tooltips to display ZIP code, population, and charger count
tooltip = GeoJsonTooltip(
    fields=['ZCTA5CE20', 'population', 'charger_count'],
    aliases=['ZIP Code:', 'Population:', 'EV Charger Count:'],
    localize=True
)
folium.GeoJson(
    map_zip_population.to_json(),
    tooltip=tooltip
).add_to(m)

m.save('assets/cenpy_assets/population_vs_ev_charger_density_by_zip.html') # Save html to assets

# Display the map
m


## Single Layer Correlation Graph Between Zip Code Population Size VS EV Charging Stations In SDGE Territories

In [None]:
# Normalize population and charger count
map_zip_population['population_norm'] = map_zip_population['population'] / map_zip_population['population'].max()
map_zip_population['charger_norm'] = map_zip_population['charger_count'] / map_zip_population['charger_count'].max()

# Calculate a composite correlation score
map_zip_population['correlation_score'] = map_zip_population['population_norm'] * map_zip_population['charger_norm']

In [None]:
# Initialize the map centered around San Diego
m = folium.Map(location=[32.8, -117.2], zoom_start=10)

# Add a choropleth layer for the correlation score
Choropleth(
    geo_data=map_zip_population.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=map_zip_population,
    columns=['ZCTA5CE20', 'correlation_score'],  # Use ZIP code and correlation score
    key_on='feature.properties.ZCTA5CE20',  # Match ZIP code in the shapefile
    fill_color='RdYlBu',  # Diverging color scale
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Correlation: Population vs. Charger Density'
).add_to(m)

# Add tooltips to display ZIP code, population, and charger count
tooltip = GeoJsonTooltip(
    fields=['ZCTA5CE20', 'population', 'charger_count'],
    aliases=['ZIP Code:', 'Population:', 'EV Charger Count:'],
    localize=True,
)
folium.GeoJson(
    map_zip_population.to_json(),
    tooltip=tooltip
).add_to(m)

m.save('assets/cenpy_assets/correlation_population_vs_ev_charger_density_by_zip.html') # Save html to assets

# Display the map
m


## Merging Median Income Data with EV Station Density Data

In [None]:
# Convert ZIP codes in population data to string for merging
merged_income_data['zip'] = merged_income_data['zip'].astype(str)

# Merge population data with the shapefile
map_zip_income = zip_shapefile.merge(merged_income_data, left_on='ZCTA5CE20', right_on='zip', how='inner')

# Fill NaN values in population column (if necessary) and sort
map_zip_income['population'] = map_zip_income['median_income'].fillna(0)
map_zip_income = map_zip_income.sort_values(by='median_income', ascending=False)

map_zip_income

## Displaying Median Income Distribution While Laying EV Station Density

In [None]:
# Initialize the map centered around San Diego
m = folium.Map(location=[32.8, -117.2], zoom_start=10)

# Add a choropleth layer for median income
Choropleth(
    geo_data=map_zip_income.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=map_zip_income,
    columns=['ZCTA5CE20', 'median_income'],  # Use ZIP code and median income
    key_on='feature.properties.ZCTA5CE20',   # Match ZIP code in the shapefile
    fill_color='YlOrRd',                     # Warm color scale for income
    fill_opacity=0.6,                        # Higher opacity for income
    line_opacity=0.2,
    legend_name='Median Household Income by ZIP Code'
).add_to(m)

# Add a second choropleth layer for charger density
Choropleth(
    geo_data=map_zip_income.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=map_zip_income,
    columns=['ZCTA5CE20', 'charger_count'], # Use ZIP code and charger count
    key_on='feature.properties.ZCTA5CE20',  # Match ZIP code in the shapefile
    fill_color='Greens',                    # Color scale for chargers
    fill_opacity=0.4,
    line_opacity=0.2,
    legend_name='EV Charger Density by ZIP Code'
).add_to(m)

# Add tooltips to display ZIP code, income, and charger count
tooltip = GeoJsonTooltip(
    fields=['ZCTA5CE20', 'median_income', 'charger_count'],
    aliases=['ZIP Code:', 'Median Income:', 'EV Charger Count:'],
    localize=True
)
folium.GeoJson(
    map_zip_income.to_json(),
    tooltip=tooltip
).add_to(m)

m.save('assets/cenpy_assets/median_income_vs_ev_charger_density_by_zip.html') # Save html to assets

# Display the map
m


## Single Layer Correlation Graph Between Median Household Income VS EV Charging Stations In SDGE Territories

In [None]:
# Normalize median income and charger count
map_zip_income['income_norm'] = map_zip_income['median_income'] / map_zip_income['median_income'].max()
map_zip_income['charger_norm'] = map_zip_income['charger_count'] / map_zip_income['charger_count'].max()

# Calculate a composite correlation score
map_zip_income['correlation_score'] = map_zip_income['income_norm'] * map_zip_income['charger_norm']


In [None]:
# Initialize the map centered around San Diego
m = folium.Map(location=[32.8, -117.2], zoom_start=10)

# Add a choropleth layer for the correlation score
Choropleth(
    geo_data=map_zip_income.to_json(),  # Convert GeoDataFrame to JSON for Folium
    data=map_zip_income,
    columns=['ZCTA5CE20', 'correlation_score'],  # Use ZIP code and correlation score
    key_on='feature.properties.ZCTA5CE20',  # Match ZIP code in the shapefile
    fill_color='RdYlBu',  # Diverging color scale
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Correlation: Median Income vs. Charger Density'
).add_to(m)

# Add tooltips to display ZIP code, median income, and charger count
tooltip = GeoJsonTooltip(
    fields=['ZCTA5CE20', 'median_income', 'charger_count'],
    aliases=['ZIP Code:', 'Median Income:', 'EV Charger Count:'],
    localize=True,
)
folium.GeoJson(
    map_zip_income.to_json(),
    tooltip=tooltip
).add_to(m)

m.save('assets/cenpy_assets/correlation_median_income_vs_ev_charger_density_by_zip.html') # Save html to assets

# Display the map
m
