# Inspect data

In [None]:
from cities.utils import data_grabber
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
feature_names = data_grabber.list_available_features()

In [None]:
url = "https://www2.census.gov/geo/tiger/GENZ2021/shp/cb_2021_us_county_20m.zip"
counties = gpd.read_file(url)
# Set the projection for the contiguous US using Albers Equal Area
counties = counties.to_crs(epsg=5070)
# set geoid to string
counties["GEOID"] = counties["GEOID"].astype(int)
counties = counties[["GEOID", "geometry"]]

In [None]:
# make a df with all the features
data = data_grabber.DataGrabber()
# data.get_features_wide(feature_names)
data.get_features_std_wide(feature_names)
gdp = data.std_wide["gdp"]

In [None]:
for feature in feature_names:
    print(feature)
    # load data
    data.get_features_wide([feature])
    feature_df = data.std_wide[feature]

    # make merged geo df
    feature_df["GeoFIPS"] = feature_df["GeoFIPS"].astype(int)
    geo_feature = feature_df.merge(counties, left_on="GeoFIPS", right_on="GEOID")
    geo_feature = gpd.GeoDataFrame(geo_feature)

    # decide which columns to plot
    cols_to_plot = [col for col in geo_feature.columns if col not in ["GeoFIPS", "GEOID", "geometry", "GeoName"]]
    # if col contains 19* or 20*, then don't plot it, except last one
    # yr_cols = [col for col in cols_to_plot if col.startswith("19") or col.startswith("20")]
    # cols_to_plot = [col for col in cols_to_plot if col not in yr_cols or col == yr_cols[-1]]

    # plot
    for col in cols_to_plot:
        geo_feature.plot(column=col, legend=False)
        plt.title(feature + ": " + col)
        plt.rcParams['figure.figsize'] = [20, 10]
        plt.show()