In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

gdf = gpd.read_file("cb_2023_us_county_5m/cb_2023_us_county_5m.shp")

gdf = gdf[gdf['STATEFP'] == '06']

water_quality =  [75.688, 68.948876,72.13962857,63.2111,70.8888,71.475253, 71.1763, 73.108667, 73.9704 ,70.582]
poverty_rate = [7.6, 11.2, 7.3 ,11.9, 9.2,8.3,9.7,8.9,8.6,9.1]
county = ["Santa Clara", "Santa Cruz", "San Mateo", "San Francisco", "Alameda","Contra Costa","Solano" ,"Napa","Sonoma" ,"Marin" 
]

gdf = gdf[gdf['NAME'].isin(county)]

data = pd.DataFrame({
    'NAME': county,
    'poverty_rate': poverty_rate,      
    'water_quality': water_quality 
})

gdf = gdf.merge(data, on='NAME')

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

gdf.plot(
    column='poverty_rate',
    cmap='Reds',
    edgecolor='black',
    legend=False,
    ax=ax1
)
ax1.set_title("Poverty Rate", fontsize = 16)
ax1.axis('off')

gdf.plot(
    column='water_quality',
    cmap='coolwarm_r',  
    edgecolor='black',
    legend=False,
    ax=ax2
)
ax2.set_title("Water Quality Index",fontsize = 16)
ax2.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import geopandas as gpd
import pandas as pd
import folium
from folium.features import GeoJsonTooltip

#shapefile for California
gdf = gpd.read_file("cb_2023_us_county_5m/cb_2023_us_county_5m.shp")
gdf = gdf[gdf['STATEFP'] == '06']

#data 
county = ["Santa Clara", "Santa Cruz", "San Mateo", "San Francisco", 
          "Alameda","Contra Costa","Solano" ,"Napa","Sonoma" ,"Marin"]

water_quality = [75.688, 68.948876, 72.13962857, 63.2111, 70.8888, 
                 71.475253, 71.1763, 73.108667, 73.9704, 70.582]

poverty_rate = [7.6, 11.2, 7.3, 11.9, 9.2, 8.3, 9.7, 8.9, 8.6, 9.1]

data = pd.DataFrame({
    'NAME': county,
    'poverty_rate': poverty_rate,
    'water_quality': water_quality
})

gdf = gdf[gdf['NAME'].isin(county)].merge(data, on='NAME')

gdf['poverty_bin'] = pd.qcut(gdf['poverty_rate'], q=3, labels=[0,1,2])
gdf['water_bin'] = pd.qcut(gdf['water_quality'], q=3, labels=[0,1,2])

gdf['water_bin_flipped'] = 2 - gdf['water_bin'].astype(int)

bivariate_colors = [
    ['#6baed6', '#bdd7e7', '#f0f0f0'], 
    ['#fcbba1', '#fc9272', '#fb6a4a'],
    ['#ef3b2c', '#cb181d', '#99000d']  
]

def get_color(row):
    return bivariate_colors[int(row['poverty_bin'])][int(row['water_bin_flipped'])]

gdf['bivar_color'] = gdf.apply(get_color, axis=1)

gdf = gdf.to_crs(epsg=4326)

center = gdf.geometry.unary_union.centroid
center = gdf.geometry.unary_union.centroid
m = folium.Map(
    location=[center.y, center.x],
    zoom_start=9,      
    tiles='cartodbpositron',
    max_bounds=True     
)


folium.GeoJson(
    gdf,
    style_function=lambda feature: {
        'fillColor': feature['properties']['bivar_color'],
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.8
    },
    tooltip=GeoJsonTooltip(
        fields=['NAME', 'poverty_rate', 'water_quality'],
        aliases=['County:', 'Poverty rate:', 'Water quality:'],
        localize=True
    )
).add_to(m)

m.save("bivariate_map.html")
print("Saved interactive bivariate map to bivariate_map.html")


In [None]:
import geopandas as gpd
import pandas as pd
import folium
from folium.features import GeoJsonTooltip
import branca.colormap as cm
import sys

SHAPEFILE = "cb_2023_us_county_5m/cb_2023_us_county_5m.shp"

COUNTIES = [
    "Santa Clara", "Santa Cruz", "San Mateo", "San Francisco", "Alameda",
    "Contra Costa", "Solano", "Napa", "Sonoma", "Marin"
]
water_quality = [75.688, 68.948876, 72.13962857, 63.2111, 70.8888,
                 71.475253, 71.1763, 73.108667, 73.9704, 70.582]
poverty_rate = [7.6, 11.2, 7.3, 11.9, 9.2, 8.3, 9.7, 8.9, 8.6, 9.1]

def normalize_name(s):
    if s is None:
        return ""
    return str(s).lower().replace(" county", "").strip()

gdf = gpd.read_file(SHAPEFILE)
print("Read shapefile with columns:", list(gdf.columns))
print("CRS:", gdf.crs)

priority = ['NAME', 'NAMELSAD', 'COUNTY', 'COUNTY_NM', 'CNTYNAME', 'NAME_0', 'NAME10', 'NAME_1']

name_col = None
for c in priority:
    if c in gdf.columns:
        name_col = c
        print(f"Using prioritized name column: {name_col}")
        break

if name_col is None:
    for c in gdf.columns:
        if c == gdf.geometry.name:
            continue
        if gdf[c].dtype == object:
            name_col = c
            print(f"No priority column found â€” selecting first object column as name: {name_col}")
            break


gdf['NAME_norm'] = gdf[name_col].astype(str).apply(normalize_name)

if 'STATEFP' in gdf.columns:
    gdf = gdf[gdf['STATEFP'] == '06']
    print("Filtered to STATEFP == '06' (California). Rows now:", len(gdf))
    
data_df = pd.DataFrame({
    'NAME': COUNTIES,
    'poverty_rate': poverty_rate,
    'water_quality': water_quality
})
data_df['NAME_norm'] = data_df['NAME'].apply(normalize_name)

shp_names = set(gdf['NAME_norm'].unique())
print("\nSample of normalized names from shapefile (first 20):")
print(list(shp_names)[:20])

missing = [c for c in data_df['NAME_norm'] if c not in shp_names]
present = [c for c in data_df['NAME_norm'] if c in shp_names]
print(f"\nRequested counties present (normalized): {len(present)}")
print(f"Requested counties missing (normalized): {len(missing)} -> {missing}")

merged = gdf.merge(data_df, on='NAME_norm', how='inner')
print("Rows after merge:", len(merged))
if merged.empty:
    print("ERROR: No rows after merge. That means names still didn't match.")
    print("Available name-like columns in shapefile:")
    for c in gdf.columns:
        if gdf[c].dtype == object and c != gdf.geometry.name:
            print(" -", c, " (sample values):", list(gdf[c].astype(str).unique()[:5]))
    sys.exit(1)

merged = merged.to_crs(epsg=4326)
center_geom = merged.geometry.unary_union.centroid

center = gdf.geometry.unary_union.centroid
m = folium.Map(
    location=[center.y, center.x],
    zoom_start=9,    
    tiles='cartodbpositron',
    max_bounds=True      
)

poverty_cmap = cm.LinearColormap(['#fff5f0','#fee0d2','#fcbba1','#fc9272','#fb6a4a','#cb181d'],
                                 vmin=merged['poverty_rate'].min(), vmax=merged['poverty_rate'].max(),
                                 caption='Poverty rate (%)')
water_cmap = cm.LinearColormap(['#313695','#4575b4','#74add1','#fdae61','#f46d43','#a50026'],
                               vmin=merged['water_quality'].min(), vmax=merged['water_quality'].max(),
                               caption='Water quality index')

poverty_fg = folium.FeatureGroup(name='Poverty rate', show=True)
water_fg = folium.FeatureGroup(name='Water quality', show=False)

def style_poverty(feature):
    val = feature['properties']['poverty_rate']
    return {'fillColor': poverty_cmap(val), 'color': 'black', 'weight':1, 'fillOpacity':0.8}

def style_water(feature):
    val = feature['properties']['water_quality']
    return {'fillColor': water_cmap(val), 'color': 'black', 'weight':1, 'fillOpacity':0.8}

folium.GeoJson(
    merged,
    style_function=style_poverty,
    tooltip=GeoJsonTooltip(fields=['NAMELSAD', 'poverty_rate'], aliases=['County:', 'Poverty rate:'])
).add_to(poverty_fg)

folium.GeoJson(
    merged,
    style_function=style_water,
    tooltip=GeoJsonTooltip(fields=['NAMELSAD', 'water_quality'], aliases=['County:', 'Water quality:'])
).add_to(water_fg)

poverty_fg.add_to(m)
water_fg.add_to(m)

poverty_cmap.add_to(m)
water_cmap.add_to(m)
folium.LayerControl(collapsed=False).add_to(m)

OUT = "counties_map.html"
m.save(OUT)
print("Saved map to", OUT)


In [None]:
import geopandas as gpd
SHAPEFILE = "cb_2023_us_county_5m/cb_2023_us_county_5m.shp"

gdf = gpd.read_file(SHAPEFILE)
print("Total rows:", len(gdf))
print("Columns:\n", list(gdf.columns))

print("\nColumn dtypes:")
print(gdf.dtypes)

print("\nFirst 5 rows (head):")
print(gdf.head().T)     

candidates = ['NAME', 'NAMELSAD', 'NAME_0', 'COUNTY', 'COUNTY_NM', 'CNTYNAME', 'GEOID']
for c in candidates:
    if c in gdf.columns:
        print(f"\nSample unique values from column '{c}':")
        print(gdf[c].astype(str).unique()[:10])
