In [12]:
import matplotlib.pyplot as plt
from matplotlib  import rc
import pandas as pd
import geopandas as gpd
from census import Census
from us import states
import numpy as np
import os

In [13]:
# Create a Census object with an API key
if not os.path.exists('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv'):
        
    (
        pd.read_csv('../input_data_clean/census_clean.csv')
        .merge(pd.read_csv('../input_data_clean/bank_tract_clean.csv'),
               on = "census_tract", 
               how = "inner",
               validate = "1:m")
        .to_csv('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv')
    )
bank_tract = pd.read_csv('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv')

In [16]:
c = Census("a82acbcf878654f6da6b3139274b90ff160b8120")
# Get census data for Arizona at the tract level for additional fields in 2020
az_census = c.acs5.state_county_tract(fields=('NAME', 'C17002_001E', 'C17002_002E', 'C17002_003E', 'B01003_001E', 'B02001_002E', 'B06011_001E'),
                                      state_fips=states.AZ.fips,
                                      county_fips="*",
                                      tract="*",
                                      year=2020)
ca_census = c.acs5.state_county_tract(fields=('NAME', 'C17002_001E', 'C17002_002E', 'C17002_003E', 'B01003_001E', 'B02001_002E', 'B06011_001E'),
                                      state_fips=states.CA.fips,
                                      county_fips="*",
                                      tract="*",
                                      year=2020)
# Convert census data to a Pandas DataFrame

az_df = pd.DataFrame(az_census)
ca_df = pd.DataFrame(ca_census)

# Concatenate the two DataFrames
combined_df = pd.concat([az_df, ca_df])

# Reset the index of the combined DataFrame
combined_df.reset_index(drop=True, inplace=True)

# Get the Arizona and California tract shapefile
shape_az = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2020/TRACT/tl_2020_04_tract.zip").to_crs(epsg=32617)
shape_ca = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2020/TRACT/tl_2020_06_tract.zip").to_crs(epsg=32617)
shape_all = pd.concat([shape_az, shape_ca], ignore_index=True)
# Combine the census data with the shapefile using the GEOID column
combined_df["GEOID"] = combined_df["state"] + combined_df["county"] + combined_df["tract"]
combined_df = combined_df.drop(columns=["state", "county"])
azca_merge = shape_all.merge(combined_df, on="GEOID")

# Select columns for poverty and minority analysis
# az_minority_tract = az_merge[["STATEFP", "COUNTYFP", "TRACTCE", "GEOID", "geometry", "C17002_001E", "C17002_002E", "C17002_003E", "B01003_001E", "B02001_002E",'B06011_001E']]

# Aggregate data to the county level

In [10]:
shape_all.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 10894 entries, 0 to 10893
Data columns (total 13 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   STATEFP   10894 non-null  object  
 1   COUNTYFP  10894 non-null  object  
 2   TRACTCE   10894 non-null  object  
 3   GEOID     10894 non-null  object  
 4   NAME      10894 non-null  object  
 5   NAMELSAD  10894 non-null  object  
 6   MTFCC     10894 non-null  object  
 7   FUNCSTAT  10894 non-null  object  
 8   ALAND     10894 non-null  int64   
 9   AWATER    10894 non-null  int64   
 10  INTPTLAT  10894 non-null  object  
 11  INTPTLON  10894 non-null  object  
 12  geometry  10894 non-null  geometry
dtypes: geometry(1), int64(2), object(10)
memory usage: 1.1+ MB


In [7]:
#'Tot.Pop', 'Tot.WhitePop', 'Tot.BlackPop ', 'Tot.AmericIndianPop', 'Tot.AsianPop', '
#Tot.NativeHawaiianPacificPop', 'Tot.OtherRaceAlonePop', 'Tot.TwoOrMoreRace', 
#'Tot.TwoOrMoreRace(Some other race)', 'Tot.Hispanic/Latino', 'Tot.NotHispanic/Latino' 
# and then without tot. it works for each tract
# with open('census_token.txt') as f: 
#     c = Census(f.read())

# create all shade vars list
#create 
###
#zoom params = DICT('sf':sldkfljaslk;djf)
#for city in ['sf',la...]
    # city_zoom = zoom_param[city]
    #for shade in ['white_rate'...]
        #for redline in ['majority-minority...]:
            # clear plot
            #f () ---> filename = f'plots/{city[0:3]}-{shade[0:10]}-{redline[0:10]}.csv
            # save plot --> plots/

In [None]:
az_tract = az_tract.merge(shape_all, how: 'left', on:    # <--- how left, on, indicator, validate)

In [None]:
competitors_only = bank_tract.query('bank == "Other"')

In [None]:
# az_minority_tract

In [None]:
az_minority_county = az_minority_tract #.dissolve(by='TRACTCE', aggfunc='sum')

In [None]:
# Calculate the percentage of the white population in each county
az_minority_county["White_Rate"] = (az_minority_county["B02001_002E"]) / az_minority_county["B01003_001E"] * 100

az_minority_county['majority-minority'] = az_minority_county["White_Rate"] < 50

az_minority_county['below_p10_income'] = az_minority_county["B06011_001E"] < az_minority_county["B06011_001E"].quantile(0.1)

In [None]:
def red_line_map(df,shading_var:str,redline_var:str,optional_title= None,option_zoom_params=None):
    fig, ax = plt.subplots(1, 1, figsize = (20, 10))

    # Plot data
    # Source: https://geopandas.readthedocs.io/en/latest/docs/user_guide/mapping.html
    df.plot(column = shading_var,
                           ax = ax,
                           cmap = "Greens",
                           legend = True)

    # add the the majority minority tract lines in red
    query = f'`{redline_var}`'
    df.query(query).boundary.plot(color='red',ax=ax, linewidth=2)

    # add the other tract lines in grey
    query = f'`{redline_var}` == False'    
    df.query(query).boundary.plot(color='gainsboro',ax=ax, linewidth=.2)

    # Stylize plots
    plt.style.use('bmh')

    # Set title
    plt.title(f'{shading_var} in AZ \n Red outlines are based on {redline_var}', fontdict = {'fontsize': 15})

    # optionally, zoom in
    if option_zoom_params:
        ax.set_xlim(option_zoom_params['x'])
        ax.set_ylim(option_zoom_params['y'])

# Some useful plots

In [None]:
phx_zoom = {'x': [-2.5e6,-2.35e6], 'y' : [4.1e6,4.25e6]}

red_line_map(az_minority_county,'White_Rate','majority-minority',option_zoom_params=phx_zoom)