In [1]:
import matplotlib.pyplot as plt
from matplotlib  import rc
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import plotly.express as px
from plotly import graph_objs as go


In [2]:
bank_tract = pd.read_csv('../input_data_clean/bank_tract_clean_WITH_CENSUS.csv')
bank_tract['which_bank'] = bank_tract['which_bank'].str.replace(' ', '')

 # | (state == 18 & county in [97])

In [3]:
# Create a copy of the original dataframe
bank_tract_new = bank_tract.copy()

# Find the census tracts that have AllOtherBanks but not BankofWest
missing_tracts = bank_tract_new[(bank_tract_new['which_bank'] == 'AllOtherBanks') & ~(bank_tract_new['census_tract'].isin(bank_tract_new[bank_tract_new['which_bank'] == 'BankofWest']['census_tract']))]

# Update which_bank column to BankofWest
missing_tracts['which_bank'] = 'BankofWest'

# Set bank specific variables to 0
start_col = bank_tract.columns.get_loc('sum_approved_loans')
bank_specific_vars = bank_tract.columns[start_col:]
missing_tracts[bank_specific_vars] = np.NaN
missing_tracts['log_num_apps'] = 0

# Append the missing rows to the new dataframe
bank_tract_new = pd.concat([bank_tract_new, missing_tracts], ignore_index=True)

# Verify that there are now equal numbers of rows for each which_bank value
bank_tract_new["which_bank"].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  missing_tracts['which_bank'] = 'BankofWest'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  missing_tracts[bank_specific_vars] = np.NaN
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  missing_tracts['log_num_apps'] = 0


BankofWest       9340
AllOtherBanks    9337
Name: which_bank, dtype: int64

In [4]:
bank_tract = bank_tract_new


In [5]:
bank_tract ['white_rate']        = (bank_tract ['WhitePop']) / bank_tract ['Tot.Pop'] * 100
bank_tract ['majorityminority'] = bank_tract ['white_rate'] < 50

# bank_tract = bank_tract.query('(state == 4 & county in [13,19]) | (state == 6 & county in [37,75, 81, 1,85])') # | (state == 18 & county in [97])
bank_tract["which_bank"].value_counts()

BankofWest       9340
AllOtherBanks    9337
Name: which_bank, dtype: int64

In [6]:
grouped = bank_tract.groupby(['census_tract', 'which_bank']) 
denial_rates = grouped['denial_rate'].mean()

bank_of_west = denial_rates.loc[(slice(None), 'BankofWest')]
all_other_banks = denial_rates.loc[(slice(None), 'AllOtherBanks')]
percent_difference = bank_of_west - all_other_banks

df_percent_difference = pd.DataFrame({'census_tract': percent_difference.index.get_level_values(0), 'percent_difference': percent_difference.values})

bank_tract = pd.merge(bank_tract, df_percent_difference, on='census_tract', how='left')


In [7]:
shape_az = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2020/TRACT/tl_2020_04_tract.zip") #.to_crs(epsg=32617)
shape_ca = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2020/TRACT/tl_2020_06_tract.zip") #.to_crs(epsg=32617)
# shape_in = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2020/TRACT/tl_2020_18_tract.zip") #.to_crs(epsg=32617)

shape_all = pd.concat([shape_az, shape_ca], ignore_index=True)
shape_all['census_tract'] = shape_all['GEOID'].astype(np.int64)

In [8]:
tract_with_shape = shape_all.merge(bank_tract, how = 'inner', on = ['census_tract'],
                                   indicator = True, validate= '1:m') 

In [9]:
concerns = tract_with_shape.query('majorityminority').copy()
# we need this for the "area of concern" red marks
concerns['lon'] = concerns['INTPTLON'].astype(float)
concerns['lat'] = concerns['INTPTLAT'].astype(float)

In [10]:
listofshading = ['percent_difference']

In [11]:
import plotly.colors as colors
colorscale = colors.sequential.Greens[::-1] + colors.sequential.Reds

for v in listofshading:

    fig = px.choropleth_mapbox(tract_with_shape.set_index('GEOID'),
                               geojson=tract_with_shape.geometry,
                               locations=tract_with_shape.index,
                               opacity=.7,
                               color=v, color_continuous_scale=colorscale,
                               center={"lat": 33.44, "lon": -112.074036},
                               mapbox_style="open-street-map",
                               zoom=5)

    fig.add_scattermapbox(
        lat = concerns['lat'],
        lon = concerns['lon'],
        mode = 'markers',
        marker_size=12,
        marker_color='red'
    )

    fig.update_layout(
        title=v,
        autosize=False,
        width=1000,
        height=1000,
        margin={"r":0,"t":0,"l":0,"b":0}
    )
    fig.write_html(f"../images/plotly/{v}.html")

In [12]:
bank_tract["which_bank"].value_counts()

BankofWest       9340
AllOtherBanks    9337
Name: which_bank, dtype: int64