In [58]:
import pandas as pd 
import csv 
import geopandas as gpd
import os
from shapely.geometry import Point
from geopy.distance import geodesic
pd.set_option('display.max_columns', None)

### Feature Engineering the Target Variable (Bank Desert Status)
- This bank desert status is measured in the census tract level
- Conditions for the target variable: The bank desert status (bank desert, potential bank desert, not a bank desert) is determined by a few factors including the community type (2 miles for Urban, 5 miles for Suburban and 10 miles for Rural), if the distance between the center of a census tract (depending on the community type) and the nearest bank branch is greater than the community type mileage, the census tract is a bank desert.
##### The logic is as follows:
- For each census tract (geoid) in the shapefile:
You calculate the center (centroid) of that census tract.


- For each bank in the bank dataset:
You check if the bank's geoid matches the current census tract's geoid.
If the bank is in the same geoid, you calculate the distance between the center of the census tract and each bank location.


- Determine the bank desert status:
If there are no banks within the specified radius (based on community type), the census tract is classified as a "bank desert".
If there is one bank within the specified radius, the census tract is classified as a "potential bank desert".
If there are multiple banks within the specified radius, the census tract is classified as "not a bank desert".

In [60]:
# read census tract shapefiles data and bank data 
shp_file_path = '../data/shp_ruca_combined_final.parquet'
bank_file_path = '../data/fdic_ncua_gdf_final.parquet'

cs_shp_gdf = gpd.read_parquet(shp_file_path)
banks_gdf = gpd.read_parquet(bank_file_path)

In [61]:
cs_shp_gdf

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,GEOIDFQ,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,source_file,Community Type
0,01,063,060101,01063060101,1400000US01063060101,601.01,Census Tract 601.01,G5020,S,79177219,846346,32.779875,-87.936632,"POLYGON ((-87.99829 32.76851, -87.99326 32.771...",tl_2024_01_tract.shp,Urban
1,01,069,040802,01069040802,1400000US01069040802,408.02,Census Tract 408.02,G5020,S,31146753,351369,31.149447,-85.418679,"POLYGON ((-85.44941 31.15536, -85.44937 31.155...",tl_2024_01_tract.shp,Urban
2,01,069,040205,01069040205,1400000US01069040205,402.05,Census Tract 402.05,G5020,S,8597200,89063,31.231052,-85.472151,"POLYGON ((-85.50281 31.23736, -85.50261 31.238...",tl_2024_01_tract.shp,Urban
3,01,069,040203,01069040203,1400000US01069040203,402.03,Census Tract 402.03,G5020,S,14602533,0,31.261332,-85.474245,"POLYGON ((-85.50379 31.24638, -85.50281 31.247...",tl_2024_01_tract.shp,Urban
4,01,069,040801,01069040801,1400000US01069040801,408.01,Census Tract 408.01,G5020,S,20146185,217773,31.188587,-85.443952,"POLYGON ((-85.47512 31.2079, -85.47504 31.2083...",tl_2024_01_tract.shp,Urban
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72213,56,021,000200,56021000200,1400000US56021000200,2,Census Tract 2,G5020,S,7538613,0,41.124339,-104.808835,"POLYGON ((-104.85109 41.11787, -104.85084 41.1...",tl_2024_56_tract.shp,Urban
72214,56,009,956400,56009956400,1400000US56009956400,9564,Census Tract 9564,G5020,S,121226002,879080,42.796182,-105.299046,"POLYGON ((-105.42173 42.80178, -105.42172 42.8...",tl_2024_56_tract.shp,Rural
72215,56,009,956400,56009956400,1400000US56009956400,9564,Census Tract 9564,G5020,S,121226002,879080,42.796182,-105.299046,"POLYGON ((-105.42173 42.80178, -105.42172 42.8...",tl_2024_56_tract.shp,Rural
72216,56,009,956400,56009956400,1400000US56009956400,9564,Census Tract 9564,G5020,S,121226002,879080,42.796182,-105.299046,"POLYGON ((-105.42173 42.80178, -105.42172 42.8...",tl_2024_56_tract.shp,Rural


In [62]:
banks_gdf

Unnamed: 0_level_0,City,State,ZIP,Input Address,State Code,County Code,Tract Code,Block Code,Longitude,Latitude,FIPS 11,Bank Name,Geometry
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,Gloucester,MA,1930,"102 Rogers St, Gloucester, Massachusetts, 01930",25,009,221500,2005,-70.658897,42.613537,25009221500,"Santander Bank, N.A. Gloucester Branch",POINT (-70.6589 42.61354)
1,Tampa,FL,33602,"401 E Jackson St, Tampa, Florida, 33602",12,057,005101,2079,-82.456675,27.947186,12057005101,Truist Bank Truist Financial Center Branch,POINT (-82.45667 27.94719)
2,Richmond,MO,64085,"803 Wollard Blvd, Richmond, Missouri, 64085",29,177,080202,2002,-93.959037,39.269180,29177080202,Flat Branch Bank,POINT (-93.95904 39.26918)
3,Mendota,IL,61342,"706 Washington St, Mendota, Illinois, 61342",17,099,961900,4022,-89.118569,41.548092,17099961900,First State Bank,POINT (-89.11857 41.54809)
4,Burlington,CO,80807,"410 14th St, Burlington, Colorado, 80807",08,063,962100,2065,-102.267955,39.304779,08063962100,Farmers & Merchants Bank Of Colby Burlington B...,POINT (-102.26795 39.30478)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
70999,Augusta,ME,4330,"2010 N Belfast Ave, Augusta, Me, 04330",23,011,010100,3018,-69.762596,44.324040,23011010100,Capital Area,POINT (-69.7626 44.32404)
71000,Waterville,ME,4901,"222 College Ave, Waterville, Me, 04901",23,011,024101,1001,-69.614780,44.574180,23011024101,Ksw,POINT (-69.61478 44.57418)
71001,Boyle,MS,38730,"630 Gaines Hwy, Boyle, Ms, 38730",28,011,950500,2037,-90.718231,33.712482,28011950500,Shelby/Bolivar County,POINT (-90.71823 33.71248)
71002,Tampa,FL,33619,"9927 Delaney Lake Dr, Tampa, Fl, 33619",12,057,013315,1005,-82.341062,27.932052,12057013315,Grow Financial,POINT (-82.34106 27.93205)


In [63]:
# count of banks per census tract 
# FIPS 11 == GEOID
bank_counts = banks_gdf.groupby("FIPS 11").size().reset_index(name="bank_count")
bank_counts

Unnamed: 0,FIPS 11,bank_count
0,01001020200,1
1,01001020501,1
2,01001020502,4
3,01001020600,1
4,01001020700,2
...,...,...
33241,72141957500,1
33242,72147950600,1
33243,72149720400,1
33244,72151951000,1


In [64]:
# how many unique GEOIDs in the bank data are in the census shapefile 

# matching_geoid = cs_shp_gdf[cs_shp_gdf['GEOID'].isin(banks_gdf['FIPS 11'])]
# matching_geoid

matching_geoid = bank_counts[bank_counts['FIPS 11'].isin(cs_shp_gdf['GEOID'])]
matching_geoid

Unnamed: 0,FIPS 11,bank_count
1,01001020501,1
3,01001020600,1
4,01001020700,2
7,01003010704,1
11,01003010800,4
...,...,...
33172,56039967701,8
33176,56041975301,1
33178,56043000301,6
33179,56045951100,1


- 28927 GEOID in the census shapefiles dataframe exists in the FIPS 11 in banks dataframe
- 51466 GEOID in the banks dataframe exists in the census shapefile dataframe
- There are only 24262 unique GEOIDs from the bank dataframe that is available in the census shapefile data

In [66]:
# how many unique GEOIDs are in both dataframes?
# the census tract shapefile data has a total of 72218 rows but only 60742 unique GEOID, which means some GEOIDs are duplicated
cs_shp_gdf['GEOID'].nunique()

60742

In [67]:
banks_gdf['FIPS 11'].nunique()

33246

- Some of the GEOID in the census tract shapefile dataframe is duplicated, this could be causing some discrepancy in the classification of census tract bank desert status
- Each census tract from the census tract shapefile data has to be have a unique bank desert status, it can't be duplicated

In [69]:
# Drop duplicated rows based on the GEOID column
# after running the code below, we only have 60742 rows left with unique GEOID 
cs_shp_gdf = cs_shp_gdf.drop_duplicates(subset='GEOID', keep = 'first').reset_index(drop = True)

In [70]:
# feature engineer radius distance threshold based on community type in miles 

cs_shp_gdf['Radius Threshold in Miles'] = cs_shp_gdf['Community Type'].apply(lambda x: 2 if x == 'Urban'
                                                                            else 5 if x == 'Suburban'
                                                                            else 10)

In [71]:
print(cs_shp_gdf.crs)  # check the CRS of the census shapefile GeoDataFrame
print(banks_gdf.crs)    # check the CRS of the banks GeoDataFrame

{"$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", "type": "GeographicCRS", "name": "NAD83", "datum": {"type": "GeodeticReferenceFrame", "name": "North American Datum 1983", "ellipsoid": {"name": "GRS 1980", "semi_major_axis": 6378137, "inverse_flattening": 298.257222101}}, "coordinate_system": {"subtype": "ellipsoidal", "axis": [{"name": "Geodetic latitude", "abbreviation": "Lat", "direction": "north", "unit": "degree"}, {"name": "Geodetic longitude", "abbreviation": "Lon", "direction": "east", "unit": "degree"}]}, "scope": "Geodesy.", "area": "North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana;

In [72]:
cs_shp_gdf.set_crs(epsg=4269, inplace=True)
banks_gdf.set_crs(epsg=4269, inplace=True)

Unnamed: 0_level_0,City,State,ZIP,Input Address,State Code,County Code,Tract Code,Block Code,Longitude,Latitude,FIPS 11,Bank Name,Geometry
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,Gloucester,MA,1930,"102 Rogers St, Gloucester, Massachusetts, 01930",25,009,221500,2005,-70.658897,42.613537,25009221500,"Santander Bank, N.A. Gloucester Branch",POINT (-70.6589 42.61354)
1,Tampa,FL,33602,"401 E Jackson St, Tampa, Florida, 33602",12,057,005101,2079,-82.456675,27.947186,12057005101,Truist Bank Truist Financial Center Branch,POINT (-82.45667 27.94719)
2,Richmond,MO,64085,"803 Wollard Blvd, Richmond, Missouri, 64085",29,177,080202,2002,-93.959037,39.269180,29177080202,Flat Branch Bank,POINT (-93.95904 39.26918)
3,Mendota,IL,61342,"706 Washington St, Mendota, Illinois, 61342",17,099,961900,4022,-89.118569,41.548092,17099961900,First State Bank,POINT (-89.11857 41.54809)
4,Burlington,CO,80807,"410 14th St, Burlington, Colorado, 80807",08,063,962100,2065,-102.267955,39.304779,08063962100,Farmers & Merchants Bank Of Colby Burlington B...,POINT (-102.26795 39.30478)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
70999,Augusta,ME,4330,"2010 N Belfast Ave, Augusta, Me, 04330",23,011,010100,3018,-69.762596,44.324040,23011010100,Capital Area,POINT (-69.7626 44.32404)
71000,Waterville,ME,4901,"222 College Ave, Waterville, Me, 04901",23,011,024101,1001,-69.614780,44.574180,23011024101,Ksw,POINT (-69.61478 44.57418)
71001,Boyle,MS,38730,"630 Gaines Hwy, Boyle, Ms, 38730",28,011,950500,2037,-90.718231,33.712482,28011950500,Shelby/Bolivar County,POINT (-90.71823 33.71248)
71002,Tampa,FL,33619,"9927 Delaney Lake Dr, Tampa, Fl, 33619",12,057,013315,1005,-82.341062,27.932052,12057013315,Grow Financial,POINT (-82.34106 27.93205)


In [73]:
# set coordinate reference system of banks geodataframe to the same CRS of shapefile dataframe for a more precise distance calculation
# since our census tracts radius threshold's maximum is only around 10 miles, we can use Euclidean distance, the earth's curvature is negligible in since local areas are small enough

# set the CRS for both dataframes to be the same 
cs_shp_gdf = cs_shp_gdf.to_crs(epsg=4326)
banks_gdf = banks_gdf.to_crs(epsg=4326)

- Both dataframes, shapefiles and banks, are now using the same coordinate system. EPSG:5070 (NAD83 / Conus Albers) is a suitable U.S. projection for large-scale analyses because it minimizes distortion over the contiguous U.S.

In [75]:
cs_shp_gdf.isna().sum()

STATEFP                      0
COUNTYFP                     0
TRACTCE                      0
GEOID                        0
GEOIDFQ                      0
NAME                         0
NAMELSAD                     0
MTFCC                        0
FUNCSTAT                     0
ALAND                        0
AWATER                       0
INTPTLAT                     0
INTPTLON                     0
geometry                     0
source_file                  0
Community Type               0
Radius Threshold in Miles    0
dtype: int64

In [76]:
banks_gdf.isna().sum()

City             0
State            0
ZIP              0
Input Address    0
State Code       0
County Code      0
Tract Code       0
Block Code       0
Longitude        0
Latitude         0
FIPS 11          0
Bank Name        0
Geometry         0
dtype: int64

#### Explanation regarding the bank desert status function below
- The function iterates through the individual rows in census shapefile dataframe, we extract GEOID, Community Type, the Radius Threshold in Miles (this is based on Community Type, 2 miles for Urban, 5 miles for Suburban and 10 miles for Rural), INTPTLAT (internal point latitude) and the INTPTLON (internal point longitude)
- The GEOID contains the 11 digit FIPS code of the census tract
- The Community Type is based on the Rural Urban classification taken from the USDA website
- The Radius Threshold in Miles is the radius threshold defined by the FDIC
- INTPTLAT and INTPTLON are coordinates that represent the internal point of a geographic area, this is usually the center of the census tract but due to the census tracts having irregular shapes, the internal point may be located in a more central and populated part of the tract rather than the exact geometric center.

- After extracting the columns from the census shapefile dataframe, we filter the bank dataframe based on the GEOID from the census shapefile dataframe (this is done due to a GEOID potentially having multiple banks)
- Once we have filtered the bank dataframe using the GEOID, we create an empty list called distances_list that will contain the distance between the center of a census tract (intptlat and intptlon) and the bank branch
- Then we iterate through this filtered bank dataframe, extract the Latitude and Longitude columns, calculate the geodesic distance in miles using the latitude and longitude from the shapefile and bank dataframe and append it to the distances_list
- Once we have the distances_list of all the banks relative to the census tracts centroid based on GEOID, we classify the census tracts bank desert status based on the number of banks inside the radius threshold:
if the sum of banks within the radius is 0, that census tract is considered a 'bank desert'
if the sum of banks within the radius is 1, that census tract is considered a 'potential bank desert'
if the sum of banks within the radius is other than 0 or 1, that census tract is considered 'not a bank desert'
- After classifying the status, we assign the bank desert status to the census shapefile dataframe and return the updated dataframe containing the bank desert status variable 

In [123]:
# create a function to classify each census tracts bank desert status 

def bank_desert_status(shapefile_df, bank_df):
    shapefile_df = shapefile_df.reset_index(drop=True)  # reset index to ensure sequential indexing
    
    # loop through each row of the shapefile dataframe to obtain community type, geoid and radius threshold of each census tract 
    for index, shapefile_row in shapefile_df.iterrows():
        geoid = shapefile_row['GEOID']
        radius = shapefile_row['Radius Threshold in Miles']
        
        # census shapefile internal point latitude and longitude -- not always the pertaining to the center of the tract
        census_lat = shapefile_row['INTPTLAT']
        census_lon = shapefile_row['INTPTLON']
        
        # # Get the centroid of the census tract (already reprojected to EPSG:5070)
        # census_point = shapefile_row.geometry
        
        # filter the bank dataframe for banks in each census tract, where GEOID from shapefile_df is equal to FIPS 11 from bank_df 
        banks_in_tract = bank_df[bank_df['FIPS 11'] == geoid]

        # if banks_in_tract is empty which means the GEOID from the shapefile df does not exist in the bank_df, assign 'bank desert'
        if banks_in_tract.empty:
            shapefile_df.loc[index, 'bank_desert_status'] = 'bank desert'
            continue  # skip to next tract if no banks are found
        
        # empty list to store geodesic distances between the census centroid and bank 
        distances_list = []

        # iterate through the filtered bank dataframe, banks where GEOID == FIPS 11
        for _, bank_row in banks_in_tract.iterrows(): 
            # extract bank latitude and longitude 
            bank_lat = bank_row['Latitude']
            bank_lon = bank_row['Longitude']

            # bank_point = bank_row.Geometry
            # # calculate distance in meters
            # distance_meters = census_point.distance(bank_point)

            # # Convert to miles
            # distance_miles = distance_meters * 0.000621371
            # distances_list.append(distance_miles)
            
            # calculate geodesic distance in miles
            distance_miles = geodesic((census_lat, census_lon), (bank_lat, bank_lon)).miles
            distances_list.append(distance_miles)
        print(f"Distances list for {geoid}: {distances_list}")

        # counter for banks within the radius threshold 
        within_radius_count = sum(1 for b in distances_list if b <= radius) 
        print(f"Within radius count for {geoid}: {within_radius_count}")
        
        if not distances_list: # if not distances_list means there are no banks in that census tract 
            bank_desert_status = 'bank desert'
                
        # classify the bank desert status based on the counts
        else:
            if within_radius_count == 0:
                bank_desert_status = 'bank desert'  # no banks within the radius
            elif within_radius_count == 1:
                bank_desert_status = 'potential bank desert'  # one bank within the radius
            else:
                bank_desert_status = 'not a bank desert' # multiple banks within the radius

        # assign the bank desert status to the census shapefile dataframe
        shapefile_df.loc[index, 'bank_desert_status'] = bank_desert_status
            
    # return updated shapefiled_df with bank desert status variable                 
    return shapefile_df

In [125]:
cs_shp_with_BDS = bank_desert_status(cs_shp_gdf, banks_gdf)
cs_shp_with_BDS

Distances list for 01063060101: [5.024456405884112]
Within radius count for 01063060101: 0
Distances list for 01069040205: [0.6224069002641558]
Within radius count for 01069040205: 1
Distances list for 01069040203: [1.4851390167251817]
Within radius count for 01069040203: 1
Distances list for 01069041901: [2.6984567231097203]
Within radius count for 01069041901: 0
Distances list for 01125012101: [0.9547756710264802]
Within radius count for 01125012101: 1
Distances list for 01125010304: [1.199887474044709, 1.1361844873146503]
Within radius count for 01125010304: 2
Distances list for 01125012001: [0.5278763753574526]
Within radius count for 01125012001: 1
Distances list for 01089001902: [0.741735607772277]
Within radius count for 01089001902: 1
Distances list for 01089011021: [0.917405067393921]
Within radius count for 01089011021: 1
Distances list for 01089001901: [1.744848844748539, 1.6727875464701105, 1.6136238914517496]
Within radius count for 01089001901: 3
Distances list for 010890

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,GEOIDFQ,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,source_file,Community Type,Radius Threshold in Miles,bank_desert_status
0,01,063,060101,01063060101,1400000US01063060101,601.01,Census Tract 601.01,G5020,S,79177219,846346,32.779875,-87.936632,"POLYGON ((-87.99829 32.76851, -87.99326 32.771...",tl_2024_01_tract.shp,Urban,2,bank desert
1,01,069,040802,01069040802,1400000US01069040802,408.02,Census Tract 408.02,G5020,S,31146753,351369,31.149447,-85.418679,"POLYGON ((-85.44941 31.15536, -85.44937 31.155...",tl_2024_01_tract.shp,Urban,2,bank desert
2,01,069,040205,01069040205,1400000US01069040205,402.05,Census Tract 402.05,G5020,S,8597200,89063,31.231052,-85.472151,"POLYGON ((-85.50281 31.23736, -85.50261 31.238...",tl_2024_01_tract.shp,Urban,2,potential bank desert
3,01,069,040203,01069040203,1400000US01069040203,402.03,Census Tract 402.03,G5020,S,14602533,0,31.261332,-85.474245,"POLYGON ((-85.50379 31.24638, -85.50281 31.247...",tl_2024_01_tract.shp,Urban,2,potential bank desert
4,01,069,040801,01069040801,1400000US01069040801,408.01,Census Tract 408.01,G5020,S,20146185,217773,31.188587,-85.443952,"POLYGON ((-85.47512 31.2079, -85.47504 31.2083...",tl_2024_01_tract.shp,Urban,2,bank desert
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60737,56,021,000800,56021000800,1400000US56021000800,8,Census Tract 8,G5020,S,1244409,0,41.149217,-104.798831,"POLYGON ((-104.8093 41.14624, -104.80926 41.14...",tl_2024_56_tract.shp,Urban,2,bank desert
60738,56,027,957200,56027957200,1400000US56027957200,9572,Census Tract 9572,G5020,S,6801380678,4969450,43.062159,-104.468373,"POLYGON ((-104.89994 43.49964, -104.89953 43.4...",tl_2024_56_tract.shp,Rural,10,bank desert
60739,56,021,000200,56021000200,1400000US56021000200,2,Census Tract 2,G5020,S,7538613,0,41.124339,-104.808835,"POLYGON ((-104.85109 41.11787, -104.85084 41.1...",tl_2024_56_tract.shp,Urban,2,bank desert
60740,56,009,956400,56009956400,1400000US56009956400,9564,Census Tract 9564,G5020,S,121226002,879080,42.796182,-105.299046,"POLYGON ((-105.42173 42.80178, -105.42172 42.8...",tl_2024_56_tract.shp,Rural,10,not a bank desert


- The bank desert status will be used as a target variable for our binary classification model, it will be merged with the US Census Data based on GEOID
- It will also be used in creating the interactive visualization showing the different census tracts filtered by state and county, overlayed with census data 

In [88]:
# saving the census shapefile geodataframe with the bank desert status column as a parquet file
# cs_shp_with_BDS.to_parquet('shp_with_BDS.parquet')

In [126]:
cs_shp_with_BDS.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 60742 entries, 0 to 60741
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype   
---  ------                     --------------  -----   
 0   STATEFP                    60742 non-null  object  
 1   COUNTYFP                   60742 non-null  object  
 2   TRACTCE                    60742 non-null  object  
 3   GEOID                      60742 non-null  object  
 4   GEOIDFQ                    60742 non-null  object  
 5   NAME                       60742 non-null  object  
 6   NAMELSAD                   60742 non-null  object  
 7   MTFCC                      60742 non-null  object  
 8   FUNCSTAT                   60742 non-null  object  
 9   ALAND                      60742 non-null  int64   
 10  AWATER                     60742 non-null  int64   
 11  INTPTLAT                   60742 non-null  float64 
 12  INTPTLON                   60742 non-null  float64 
 13  geometry               

In [128]:
cs_shp_with_BDS['bank_desert_status'].value_counts()

bank_desert_status
bank desert              38657
not a bank desert        11132
potential bank desert    10953
Name: count, dtype: int64

In [129]:
cs = cs_shp_with_BDS[cs_shp_with_BDS['GEOID'] == '01063060101']
bank = banks_gdf[banks_gdf['FIPS 11'] == '01063060101']

In [130]:
cs

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,GEOIDFQ,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,source_file,Community Type,Radius Threshold in Miles,bank_desert_status
0,1,63,60101,1063060101,1400000US01063060101,601.01,Census Tract 601.01,G5020,S,79177219,846346,32.779875,-87.936632,"POLYGON ((-87.99829 32.76851, -87.99326 32.771...",tl_2024_01_tract.shp,Urban,2,bank desert


In [131]:
bank

Unnamed: 0_level_0,City,State,ZIP,Input Address,State Code,County Code,Tract Code,Block Code,Longitude,Latitude,FIPS 11,Bank Name,Geometry
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
54790,Eutaw,AL,35462,"302 Prairie Ave, Eutaw, Alabama, 35462",1,63,60101,2002,-87.887949,32.840092,1063060101,"Merchants & Farmers Bank Of Greene County, Ala...",POINT (-87.88795 32.84009)


In [132]:
cs_shp_with_BDS.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 60742 entries, 0 to 60741
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype   
---  ------                     --------------  -----   
 0   STATEFP                    60742 non-null  object  
 1   COUNTYFP                   60742 non-null  object  
 2   TRACTCE                    60742 non-null  object  
 3   GEOID                      60742 non-null  object  
 4   GEOIDFQ                    60742 non-null  object  
 5   NAME                       60742 non-null  object  
 6   NAMELSAD                   60742 non-null  object  
 7   MTFCC                      60742 non-null  object  
 8   FUNCSTAT                   60742 non-null  object  
 9   ALAND                      60742 non-null  int64   
 10  AWATER                     60742 non-null  int64   
 11  INTPTLAT                   60742 non-null  float64 
 12  INTPTLON                   60742 non-null  float64 
 13  geometry               

In [140]:
# saving the census shapefile geodataframe with the bank desert status column as a parquet file

# cs_shp_with_BDS.to_parquet('shp_with_BDS.parquet')