In [2]:
import geopandas as gpd
import pandas as pd
import plotly.express as px

In [3]:
# Load the GeoJSON file with a different encoding
url = 'https://eric.clst.org/assets/wiki/uploads/Stuff/gz_2010_us_050_00_5m.json'
gdf = gpd.read_file(url, encoding='ISO-8859-1')  # Try 'latin1' or 'ISO-8859-1'

In [5]:
# Reproject to a projected CRS (e.g., EPSG:3395)
gdf = gdf.to_crs(epsg=3395)

# Now calculate the area in square miles
gdf['area'] = gdf.geometry.area / 1e6  # Area in square miles

# Plot the histogram
fig_area = px.histogram(gdf, x='area', nbins=50, title='Distribution of Areas of Counties/States (in sq miles)', labels={'area': 'Area (sq miles)'})
fig_area.show()

In [6]:
# Chart 2: Distribution of Common County Names
county_name_counts = gdf['NAME'].value_counts().head(20)  # Get top 20 most common county names
fig_county_names = px.bar(county_name_counts, x=county_name_counts.index, y=county_name_counts.values, title='Most Common County Names', labels={'index': 'County Name', 'y': 'Count'})
fig_county_names.show()

In [8]:
print(gdf.columns)

Index(['GEO_ID', 'STATE', 'COUNTY', 'NAME', 'LSAD', 'CENSUSAREA', 'geometry',
       'area'],
      dtype='object')


In [9]:
# Chart 3: Number of Counties in Each State (corrected)
state_count = gdf.groupby('STATE').size()  # Use 'STATE' column
fig_state_count = px.bar(state_count, x=state_count.index, y=state_count.values, title='Number of Counties in Each State', labels={'x': 'State', 'y': 'Number of Counties'})
fig_state_count.show()