In [32]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

### School

In [33]:
# Load datasets
df = pd.read_csv("School.csv") 

# Create geometry from X and Y
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]

# Create GeoDataFrame with the original CRS 
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:2283") 

# Convert to standard lat/lon
gdf_latlon = gdf.to_crs("EPSG:4326")

# Load tract shapefile
tracts = gpd.read_file("tl_2024_51_tract.shp")

# Filter for Prince William County (FIPS: 51153)
tracts = tracts[tracts['COUNTYFP'] == '153'] 

# Ensure both are in the same CRS
gdf_points = gdf_latlon.to_crs(tracts.crs)

# Spatial join
joined = gpd.sjoin(gdf_points, tracts, how="left", predicate="within")

In [34]:
num_school = joined.groupby('GEOID')['_id'].count().reset_index().rename(columns={'_id':'Num_School'})
num_school

Unnamed: 0,GEOID,Num_School
0,51153900100,1
1,51153900202,1
2,51153900203,1
3,51153900301,1
4,51153900403,1
...,...,...
57,51153901511,3
58,51153901601,2
59,51153901602,1
60,51153901702,2


### Shopping Centers

In [35]:
# Load datasets
df = pd.read_csv("Shopping_center.csv") 

# Create geometry from X and Y
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]

# Create GeoDataFrame with the original CRS 
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:2283") 

# Convert to standard lat/lon
gdf_latlon = gdf.to_crs("EPSG:4326")

# Load tract shapefile
tracts = gpd.read_file("tl_2024_51_tract.shp")

# Filter for Prince William County (FIPS: 51153)
tracts = tracts[tracts['COUNTYFP'] == '153'] 

# Ensure both are in the same CRS
gdf_points = gdf_latlon.to_crs(tracts.crs)

# Spatial join
joined = gpd.sjoin(gdf_points, tracts, how="left", predicate="within")

In [37]:
num_sc = joined.groupby('GEOID')['_id'].count().reset_index().rename(columns={'_id':'Num_Shopping_Center'})
num_sc.head(5)

Unnamed: 0,GEOID,Num_Shopping_Center
0,51153900201,5
1,51153900301,3
2,51153900302,1
3,51153900403,18
4,51153900404,2


### Library

In [38]:
# Load datasets
df = pd.read_csv("Library.csv") 

# Create geometry from X and Y
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]

# Create GeoDataFrame with the original CRS 
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:2283") 

# Convert to standard lat/lon
gdf_latlon = gdf.to_crs("EPSG:4326")

# Load tract shapefile
tracts = gpd.read_file("tl_2024_51_tract.shp")

# Filter for Prince William County (FIPS: 51153)
tracts = tracts[tracts['COUNTYFP'] == '153'] 

# Ensure both are in the same CRS
gdf_points = gdf_latlon.to_crs(tracts.crs)

# Spatial join
joined = gpd.sjoin(gdf_points, tracts, how="left", predicate="within")

In [41]:
num_lib = joined.groupby('GEOID')['OBJECTID'].count().reset_index().rename(columns={'OBJECTID':'Num_Library'})
num_lib.head(5)

Unnamed: 0,GEOID,Num_Library
0,51153900502,1
1,51153901010,1
2,51153901014,1
3,51153901203,1
4,51153901209,1


### Hospitals

In [42]:
# Load datasets
df = pd.read_csv("Hospitals.csv") 

# Create geometry from X and Y
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]

# Create GeoDataFrame with the original CRS 
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:2283") 

# Convert to standard lat/lon
gdf_latlon = gdf.to_crs("EPSG:4326")

# Load tract shapefile
tracts = gpd.read_file("tl_2024_51_tract.shp")

# Filter for Prince William County (FIPS: 51153)
tracts = tracts[tracts['COUNTYFP'] == '153'] 

# Ensure both are in the same CRS
gdf_points = gdf_latlon.to_crs(tracts.crs)

# Spatial join
joined = gpd.sjoin(gdf_points, tracts, how="left", predicate="within")

In [43]:
num_hos = joined.groupby('GEOID')['OBJECTID'].count().reset_index().rename(columns={'OBJECTID':'Num_Hospital'})
num_hos.head(5)

Unnamed: 0,GEOID,Num_Hospital
0,51153900301,1
1,51153900302,2
2,51153900403,2
3,51153900404,2
4,51153900502,1


### Fire Stations

In [44]:
# Load datasets
df = pd.read_csv("Fire_Stations.csv") 

# Create geometry from X and Y
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]

# Create GeoDataFrame with the original CRS 
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:2283") 

# Convert to standard lat/lon
gdf_latlon = gdf.to_crs("EPSG:4326")

# Load tract shapefile
tracts = gpd.read_file("tl_2024_51_tract.shp")

# Filter for Prince William County (FIPS: 51153)
tracts = tracts[tracts['COUNTYFP'] == '153'] 

# Ensure both are in the same CRS
gdf_points = gdf_latlon.to_crs(tracts.crs)

# Spatial join
joined = gpd.sjoin(gdf_points, tracts, how="left", predicate="within")

In [45]:
num_fire = joined.groupby('GEOID')['OBJECTID'].count().reset_index().rename(columns={'OBJECTID':'Num_Fire_Station'})
num_fire.head(5)

Unnamed: 0,GEOID,Num_Fire_Station
0,51153900202,1
1,51153900403,1
2,51153900409,1
3,51153900503,1
4,51153900504,1


### Place of Worship

In [48]:
# Load datasets
df = pd.read_csv("Places_of_Worship.csv") 

# Create geometry from X and Y
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]

# Create GeoDataFrame with the original CRS 
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:2283") 

# Convert to standard lat/lon
gdf_latlon = gdf.to_crs("EPSG:4326")

# Load tract shapefile
tracts = gpd.read_file("tl_2024_51_tract.shp")

# Filter for Prince William County (FIPS: 51153)
tracts = tracts[tracts['COUNTYFP'] == '153'] 

# Ensure both are in the same CRS
gdf_points = gdf_latlon.to_crs(tracts.crs)

# Spatial join
joined = gpd.sjoin(gdf_points, tracts, how="left", predicate="within")

In [49]:
num_worship = joined.groupby('GEOID')['OBJECTID'].count().reset_index().rename(columns={'OBJECTID':'Num_Worship'})
num_worship.head(5)

Unnamed: 0,GEOID,Num_Worship
0,51153900201,6
1,51153900202,6
2,51153900203,3
3,51153900301,3
4,51153900302,5


### Merge together

In [51]:
df_final = pd.merge(num_school,num_fire,on='GEOID',how='outer')
df_final = pd.merge(df_final,num_lib,on='GEOID',how='outer')
df_final = pd.merge(df_final,num_hos,on='GEOID',how='outer')
df_final = pd.merge(df_final,num_worship,on='GEOID',how='outer')
df_final = pd.merge(df_final,num_sc,on='GEOID',how='outer')
df_final = df_final.fillna(0)
df_final

Unnamed: 0,GEOID,Num_School,Num_Fire_Station,Num_Library,Num_Hospital,Num_Worship,Num_Shopping_Center
0,51153900100,1.0,0.0,0.0,0.0,0.0,0.0
1,51153900202,1.0,1.0,0.0,0.0,6.0,0.0
2,51153900203,1.0,0.0,0.0,0.0,3.0,0.0
3,51153900301,1.0,0.0,0.0,1.0,3.0,3.0
4,51153900403,1.0,1.0,0.0,2.0,13.0,18.0
...,...,...,...,...,...,...,...
80,51153901228,0.0,0.0,0.0,0.0,4.0,1.0
81,51153901411,0.0,0.0,0.0,0.0,4.0,1.0
82,51153901012,0.0,0.0,0.0,0.0,0.0,1.0
83,51153901015,0.0,0.0,0.0,0.0,0.0,2.0


In [52]:
df_final.to_csv('Number_Facilities.csv',index=False)

In [1]:
import pandas as pd

In [4]:
facility = pd.read_csv('Number_Facilities.csv')
facility = facility.rename(columns={'GEOID':'ID'})
facility.head()

Unnamed: 0,ID,Num_School,Num_Fire_Station,Num_Library,Num_Hospital,Num_Worship,Num_Shopping_Center
0,51153900100,1.0,0.0,0.0,0.0,0.0,0.0
1,51153900202,1.0,1.0,0.0,0.0,6.0,0.0
2,51153900203,1.0,0.0,0.0,0.0,3.0,0.0
3,51153900301,1.0,0.0,0.0,1.0,3.0,3.0
4,51153900403,1.0,1.0,0.0,2.0,13.0,18.0


In [3]:
finaldf = pd.read_csv('Final_df_NSI.csv')
finaldf.head()

Unnamed: 0,ID,PEOPCOLORPCT,LOWINCPCT,UNEMPPCT,DISABILITYPCT,LINGISOPCT,LESSHSPCT,UNDER5PCT,OVER64PCT,LIFEEXPPCT,...,Percent_Young_Related,Percent_Mainline_Related,Percent_Night_Related,Percent_Urban_Crash,Total_Population,Median_Age,Sex_Ratio(males per 100 females),Age_Dependency_Ratio,Old-age_Dependency_Ratio,Child_Dependency_Ratio
0,51153900100,0.501237,0.076685,0.006587,0.095916,0.033355,0.044565,0.057205,0.29932,0.182564,...,0.1875,1.0,0.1875,1.0,3001,54.6,99.0,63.1,48.6,14.5
1,51153900201,0.771815,0.327176,0.067423,0.154357,0.096774,0.133784,0.032364,0.072102,0.217436,...,0.141176,0.894118,0.352941,1.0,3180,36.3,69.1,51.9,14.0,38.0
2,51153900202,0.731086,0.276916,0.048494,0.039679,0.135111,0.17148,0.04098,0.06644,0.169231,...,0.190909,0.954545,0.309091,1.0,3520,37.0,119.2,47.5,16.5,31.0
3,51153900203,0.889301,0.301155,0.042468,0.100787,0.14367,0.191298,0.095228,0.057529,0.206154,...,0.098361,1.0,0.409836,1.0,4891,29.3,76.8,69.6,7.2,62.5
4,51153900301,0.556382,0.232398,0.092181,0.134315,0.093147,0.109264,0.048369,0.08901,0.180513,...,0.175758,0.951515,0.363636,1.0,3514,37.8,93.4,41.8,15.3,26.4


In [7]:
final_df = pd.merge(finaldf,facility,how='left',on='ID')
final_df['Num_School'] = final_df['Num_School'].fillna(0)
final_df['Num_Fire_Station'] = final_df['Num_Fire_Station'].fillna(0)
final_df['Num_Library'] = final_df['Num_Library'].fillna(0)
final_df['Num_Hospital'] = final_df['Num_Hospital'].fillna(0)
final_df['Num_Worship'] = final_df['Num_Worship'].fillna(0)
final_df['Num_Shopping_Center'] = final_df['Num_Shopping_Center'].fillna(0)
final_df

Unnamed: 0,ID,PEOPCOLORPCT,LOWINCPCT,UNEMPPCT,DISABILITYPCT,LINGISOPCT,LESSHSPCT,UNDER5PCT,OVER64PCT,LIFEEXPPCT,...,Sex_Ratio(males per 100 females),Age_Dependency_Ratio,Old-age_Dependency_Ratio,Child_Dependency_Ratio,Num_School,Num_Fire_Station,Num_Library,Num_Hospital,Num_Worship,Num_Shopping_Center
0,51153900100,0.501237,0.076685,0.006587,0.095916,0.033355,0.044565,0.057205,0.299320,0.182564,...,99.0,63.1,48.6,14.5,1.0,0.0,0.0,0.0,0.0,0.0
1,51153900201,0.771815,0.327176,0.067423,0.154357,0.096774,0.133784,0.032364,0.072102,0.217436,...,69.1,51.9,14.0,38.0,0.0,0.0,0.0,0.0,6.0,5.0
2,51153900202,0.731086,0.276916,0.048494,0.039679,0.135111,0.171480,0.040980,0.066440,0.169231,...,119.2,47.5,16.5,31.0,1.0,1.0,0.0,0.0,6.0,0.0
3,51153900203,0.889301,0.301155,0.042468,0.100787,0.143670,0.191298,0.095228,0.057529,0.206154,...,76.8,69.6,7.2,62.5,1.0,0.0,0.0,0.0,3.0,0.0
4,51153900301,0.556382,0.232398,0.092181,0.134315,0.093147,0.109264,0.048369,0.089010,0.180513,...,93.4,41.8,15.3,26.4,1.0,0.0,0.0,1.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,51153901702,0.744667,0.337302,0.121313,0.111039,0.181014,0.350799,0.054730,0.077785,0.226667,...,118.9,56.8,12.0,44.7,2.0,0.0,0.0,0.0,3.0,0.0
89,51153901703,0.682796,0.236175,0.000000,0.055418,0.000000,0.249219,0.038462,0.097188,0.185641,...,104.8,54.9,15.0,39.9,0.0,0.0,0.0,1.0,2.0,0.0
90,51153901704,0.890003,0.364426,0.068152,0.090108,0.318534,0.247506,0.109693,0.112124,0.185641,...,150.0,65.4,18.1,47.2,1.0,0.0,0.0,0.0,4.0,4.0
91,51153901900,0.779273,0.262259,0.028372,0.058732,0.123004,0.230222,0.095096,0.074136,0.191795,...,119.1,59.8,11.9,47.9,0.0,1.0,0.0,0.0,12.0,6.0


In [8]:
final_df.to_csv('Final_df_NSI_0414.csv',index=False)