# Merging All Data
This script will run to create mega-datasets that contain all of the information from the different datasets.

This script will not work if you have not ran the following scripts, which produce cleaned, processed datasets.
1. process_urban_rural.ipynb
2. process_deforestation.ipynb

In [93]:
import pandas as pd
import os
import geopandas as gpd

current_dir = os.getcwd()

In [94]:
# Loading Counties
counties = gpd.read_file(os.path.join(current_dir, '..', 'SharedData', 'Counties2', 'us_county.shp')).rename(columns={"GEOID": "FIPS"})
counties = counties[["STATEFP", "COUNTYFP", "FIPS", "NAME", "geometry"]]
counties

Unnamed: 0,STATEFP,COUNTYFP,FIPS,NAME,geometry
0,21,007,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -..."
1,21,017,21017,Bourbon,"POLYGON ((1350485.716 -619831.406, 1350606.534..."
2,21,031,21031,Butler,"POLYGON ((1155226.654 -790469.014, 1155326.223..."
3,21,065,21065,Estill,"POLYGON ((1390075.380 -684612.798, 1390215.039..."
4,21,069,21069,Fleming,"POLYGON ((1386431.679 -594472.514, 1386496.220..."
...,...,...,...,...,...
3227,60,010,60010,Eastern,"MULTIPOLYGON (((-933629.942 -3348091.425, -934..."
3228,60,020,60020,Manu'a,"MULTIPOLYGON (((-462740.161 -3201385.731, -462..."
3229,60,050,60050,Western,"POLYGON ((-1056182.608 -3393553.386, -1055840...."
3230,60,030,60030,Rose Island,"MULTIPOLYGON (((299577.946 -3124988.639, 29931..."


In [95]:
# Connecting deforestation data
deforestation = pd.read_csv(os.path.join(current_dir, "..", "Data", "Deforestation", "Annual_Forest_Loss_Threshold_50.csv"))
deforestation["FIPS"] = deforestation["FIPS"].fillna(0).astype(int).astype(str).str.zfill(5)

master = counties.merge(deforestation, how="inner", on="FIPS")
master = master[["STATEFP", "COUNTYFP", "FIPS", "NAME", "geometry", "extent_2000_ha", "extent_2010_ha", "Year", "HA_Loss", "percent_lost"]]
master = master.rename(columns={"extent_2000_ha":"forest_ha_2000", "extent_2010_ha": "forest_ha_2010", "HA_Loss":"forest_ha_loss", "percent_lost":"forest_percent_lost"})
master.head(10)

Unnamed: 0,STATEFP,COUNTYFP,FIPS,NAME,geometry,forest_ha_2000,forest_ha_2010,Year,forest_ha_loss,forest_percent_lost
0,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2001,8,0.000439
1,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2002,18,0.000987
2,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2003,8,0.000439
3,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2004,42,0.002303
4,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2005,22,0.001206
5,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2006,59,0.003235
6,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2007,22,0.001206
7,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2008,21,0.001151
8,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2009,82,0.004496
9,21,7,21007,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2010,53,0.002906


In [96]:
# Connecting metropolitan score data
metroscore = pd.read_csv(os.path.join(current_dir, "..", "Data", "metro.csv"))
metroscore["FIPS"] = metroscore["FIPS"].fillna(0).astype(int).astype(str).str.zfill(5)

master = master.merge(metroscore, how="left", on="FIPS")
master = master.drop("COUNTY", axis=1)
cols = master.columns.tolist()
cols = cols[:3] + [cols[10]] + cols[3:10] + cols[11:]
master = master[cols]
master.head()

Unnamed: 0,STATEFP,COUNTYFP,FIPS,STATECODE,NAME,geometry,forest_ha_2000,forest_ha_2010,Year,forest_ha_loss,forest_percent_lost,URBAN_RURAL_SCORE,METRO_LEVEL
0,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2001,8,0.000439,5,Micropolitan
1,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2002,18,0.000987,5,Micropolitan
2,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2003,8,0.000439,5,Micropolitan
3,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2004,42,0.002303,5,Micropolitan
4,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2005,22,0.001206,5,Micropolitan


In [97]:
# Connecting building data
buildings = pd.read_csv(os.path.join(current_dir, "..", "SharedData", "County_Buildings_MASTER.csv"))
buildings["FIPS"] = buildings["FIPS"].fillna(0).astype(int).astype(str).str.zfill(5)

master = master.merge(buildings, how="left", on="FIPS")
master = master.drop(columns=["STATE_NAME", "STATECODE_y", "STATE_FIPS", "COUNTY", "FORMAL_COUNTY", "CNTY_FIPS"], axis=1)
master = master.rename(columns={"STATECODE_x": "STATECODE"})
master.head()

Unnamed: 0,STATEFP,COUNTYFP,FIPS,STATECODE,NAME,geometry,forest_ha_2000,forest_ha_2010,Year,forest_ha_loss,...,bldg_count_Entertainment,area_sum_Flex,area_sum_Healthcare,area_sum_Hospitality,area_sum_Industrial,area_sum_Multifamily,area_sum_Office,area_sum_Retail,area_sum_Specialty,area_sum_Entertainment
0,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2001,8,...,0.0,9700.0,0.0,0.0,515500.0,76100.0,57800.0,43500.0,19800.0,0.0
1,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2002,18,...,0.0,9700.0,0.0,0.0,515500.0,76100.0,57800.0,43500.0,19800.0,0.0
2,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2003,8,...,0.0,9700.0,0.0,0.0,515500.0,76100.0,57800.0,43500.0,19800.0,0.0
3,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2004,42,...,0.0,9700.0,0.0,0.0,515500.0,76100.0,57800.0,43500.0,19800.0,0.0
4,21,7,21007,KY,Ballard,"POLYGON ((959182.740 -821711.187, 959268.612 -...",18240,15247,2005,22,...,0.0,9700.0,0.0,0.0,515500.0,76100.0,57800.0,43500.0,19800.0,0.0
