# Aggregation of Data from County to FAF Zone level

In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
#Read in the shapefiles
county_shp_path = 'shapefiles/2017_CFS_Metro_Areas_with_FAF/2017_CFS_Metro_Areas_with_FAF.shp'
faf_shp_path = 'shapefiles/cb_2017_us_county_500k/cb_2017_us_county_500k.shp'

county_gdf = gpd.read_file(county_shp_path)
faf_gdf = gpd.read_file(faf_shp_path)

# Ensure both are in the same CRS
if county_gdf.crs != faf_gdf.crs:
    print(f"Reprojecting FAF zones to match county CRS: {county_gdf.crs}")
    faf_gdf = faf_gdf.to_crs(county_gdf.crs)

In [3]:
county_gdf.head()


Unnamed: 0,CFS17_NAME,GEOID,ALAND,AWATER,INTPTLAT,INTPTLON,CFS07_NAME,CFS12_NAME,CFS17_NA_1,FAF_Zone,FAF_Zone_D,FAF_Zone_1,geometry
0,"Albany-Schenectady, NY CFS Area",36091,2097880000.0,87571838.0,43.1061353,-73.8553872,"Albany-Schenectady-Amsterdam, NY CFS Area","Albany-Schenectady, NY CFS Area","Albany-Schenectady, NY CFS Area",361,Albany NY CFS Area,"Albany-Schenectady, NY CFS Area","POLYGON ((-73.39754 43.56797, -73.39712 43.567..."
1,"Atlanta-Athens-Clarke County-Sandy Springs, GA...",13171,475262800.0,6046030.0,33.0744605,-84.1467208,"Atlanta-Sandy Springs-Gainesville, GA-AL CFS A...","Atlanta-Athens-Clarke County-Sandy Springs, GA...","Atlanta-Athens-Clarke County-Sandy Springs, GA...",131,Atlanta GA,"Atlanta-Athens-Clarke County-Sandy Springs, GA...","POLYGON ((-84.90840 34.62989, -84.90839 34.628..."
2,"Austin-Round Rock, TX CFS Area",48055,1412139000.0,4972802.0,29.8323986,-97.628141,"Austin-Round Rock, TX CFS Area","Austin-Round Rock, TX CFS Area","Austin-Round Rock, TX CFS Area",481,Austin TX,"Austin-Round Rock, TX CFS Area","POLYGON ((-97.82044 30.90518, -97.81952 30.905..."
3,"Baltimore-Columbia-Towson, MD CFS Area",24510,209643600.0,28767622.0,39.3000324,-76.6104761,"Baltimore-Towson, MD CFS Area","Baltimore-Columbia-Towson, MD CFS Area","Baltimore-Columbia-Towson, MD CFS Area",241,Baltimore MD,"Baltimore-Columbia-Towson, MD CFS Area","POLYGON ((-76.23998 39.72134, -76.23980 39.721..."
4,"Baton Rouge, LA CFS Area",22037,1174307000.0,6151756.0,30.8397837,-91.0434338,"Baton Rouge-Pierre Part, LA CFS Area","Baton Rouge, LA CFS Area","Baton Rouge, LA CFS Area",221,Baton Rouge LA,"Baton Rouge, LA CFS Area","POLYGON ((-91.65981 31.04399, -91.65624 31.042..."


In [4]:
faf_gdf.head()


Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,1,5,161528,0500000US01005,1005,Barbour,6,2292144656,50538698,"POLYGON ((-85.74803 31.61918, -85.74544 31.618..."
1,1,23,161537,0500000US01023,1023,Choctaw,6,2365869837,19144469,"POLYGON ((-88.47323 31.89386, -88.46888 31.930..."
2,1,35,161543,0500000US01035,1035,Conecuh,6,2201948618,6643480,"POLYGON ((-87.42720 31.26436, -87.42551 31.268..."
3,1,51,161551,0500000US01051,1051,Elmore,6,1601762124,99965171,"POLYGON ((-86.41333 32.75059, -86.37115 32.750..."
4,1,65,161558,0500000US01065,1065,Hale,6,1667907107,32423356,"POLYGON ((-87.87046 32.76244, -87.86818 32.765..."


In [None]:
#read in data you want to aggregate or disaggregate
data = pd.read_csv("data/county_business_patterns.csv")

data['FIPS'] = data['FIPS'].astype(str).str.zfill(5)

data.head()


In [None]:
print('performing spatial intersection')
intersections = gpd.overlay(county_gdf, faf_gdf, how='intersection')

In [None]:
print('calculating area of each county and the intersected portion')
intersections["intersect_area"] = intersections.geometry.area
county_gdf["total_area"] = county_gdf.geometry.area

In [None]:
print('merging with county data to get data')
# Merge with county data to get population
columns = data.columns
intersections = intersections.merge(data[columns],left_on="GEOID_1", right_on="FIPS")
intersections = intersections.merge(county_gdf[["GEOID", "total_area"]],left_on="GEOID_1", right_on="GEOID") # use area to weight the data


In [None]:
#calculate the share of the data for each county
for col in columns:
    if col != 'FIPS':
        intersections["{}_share".format(col)] = (intersections["intersect_area"] / intersections["total_area"]) * intersections[col]

In [None]:
print('grouping by FAF_Zone to aggregate data')
# Group by FAF_Zone and sum the share columns
# Exclude geometry column which can't be summed
columns_to_sum = [col for col in intersections.columns if col != 'geometry']
grouped_df = intersections[columns_to_sum].groupby('FAF_Zone').sum()

# Reset index to make FAF_Zone a column again
grouped_df = grouped_df.reset_index()

# Keep only the FAF_Zone and the share columns
share_columns = [col for col in grouped_df.columns if col.endswith('_share')]
grouped_df = grouped_df[['FAF_Zone'] + share_columns]

# Replace the intersections DataFrame with the grouped one
intersections = grouped_df


In [None]:
columns = columns[1:]
df = intersections[['FAF_Zone'] + [col + '_share' for col in columns]]
df.head()

df.to_csv("FAF_CBP.csv", index=False)
