# Banking Deserts
---
The below script uncovers the well-known phenomenon of [Banking Deserts](https://en.wikipedia.org/wiki/Banking_desert). The concept is simple: many neighborhoods with predominantly low-income and elderly populations tend to have inadequate coverage of banking services. This leads such communities to be  vulnerable to predatory loan and pricey check casher providers.

In this script, we retrieved and plotted data from the 2013 US Census and Google Places API to show the relationship between various socioeconomic parameters and bank count across 700 randomly selected zip codes. We used Pandas, Numpy, Matplotlib, Requests, Census API, and Google API to accomplish our task.

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time
from census import Census
from us import states

# Google Places API Key
gkey = "AIzaSyAQ93Lgxm0ZILQEAW0QU72_i3nM-5wgXro"

## Data Retrieval

In [2]:
# Import the census data into a pandas DataFrame
census_file = 'Census_Data.csv'
census_data = pd.read_csv(census_file)
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Preview the data
census_pd.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate
0,15081,"South Heights, PA 15081, USA",342,50.2,31500.0,22177,20.760234
1,20615,"Broomes Island, MD 20615, USA",424,43.4,114375.0,43920,5.188679
2,50201,"Nevada, IA 50201, USA",8139,40.4,56619.0,28908,7.777368
3,84020,"Draper, UT 84020, USA",42751,30.4,89922.0,33164,4.39288
4,39097,"Louise, MS 39097, USA",495,58.0,26838.0,17399,34.949495


In [3]:
# Randomly select 700 zip codes locations that have at least 100 residents
# Hint: `pd.sample()`
over_one_hundred = census_pd.loc[census_pd['Population'] >= 100]
census_sample = over_one_hundred.sample(n=700)

# Hint: `pd[pd[astype(int) > 100`]]`

# Visualize the DataFrame
census_sample.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate
122,79021,"Cotton Center, TX 79021, USA",104,26.5,56250.0,17961,25.961538
499,55063,"Pine City, MN 55063, USA",9171,43.2,47556.0,22709,13.608113
234,97711,"Ashwood, OR 97711, USA",123,34.5,27188.0,12480,41.463415
319,97396,"Willamina, OR 97396, USA",2827,44.3,36848.0,18929,16.943757
34,35956,"Boaz, AL 35956, USA",8716,40.0,46844.0,19615,12.322166


In [4]:
# Create blank columns in DataFrame for lat/lng
census_sample['Lat'] = ''
census_sample['Lng'] = ''


for index, row in census_sample.head(5).iterrows():
    zip_code = row['Zipcode']
    
    # create api url
    target_url = "https://maps.googleapis.com/maps/api/geocode/json" \
    "?address=%s&key=%s" % (zip_code, gkey)
    
    try:
        # get the response
        census_resp = requests.get(target_url).json()

        # Loop through and grab the lat/lng for each of the selected zips using Google maps
        census_sample.set_value(index, "Lat", census_resp['results'][0]['geometry']['location']['lat'])
        census_sample.set_value(index, "Lng", census_resp['results'][0]['geometry']['location']['lng'])
    except:
        print("No Data Found")

# Visualize the DataFrame
census_sample.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Lat,Lng
122,79021,"Cotton Center, TX 79021, USA",104,26.5,56250.0,17961,25.961538,33.9501,-102.0
499,55063,"Pine City, MN 55063, USA",9171,43.2,47556.0,22709,13.608113,45.8233,-92.9704
234,97711,"Ashwood, OR 97711, USA",123,34.5,27188.0,12480,41.463415,44.7135,-120.615
319,97396,"Willamina, OR 97396, USA",2827,44.3,36848.0,18929,16.943757,45.1917,-123.514
34,35956,"Boaz, AL 35956, USA",8716,40.0,46844.0,19615,12.322166,34.1586,-86.1141


In [9]:
# Create an empty column for bank count
census_sample['Bank Count'] = ''

# Re-loop through the DataFrame and run a Google Places search to get all banks in 5 mile radius (8000 meters)
# Inside the loop add the bank count to our DataFrame
for index, row in census_sample.head(5).iterrows():
    # get city and state names
    lat = row['Lat'] 
    lng = row['Lng']
    
    # create api url
    target_search = "bank"
    target_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json" \
    "?location=%s,%s&keyword=%s&radius=%s&key=%s" % (
        lat, lng, target_search, 8000, gkey)
    
    bank_data = requests.get(target_url).json()
        
    # get response data
    census_sample.set_value(index, "Bank Count", len(bank_data['results']))






# Visualize the DataFrame
census_sample.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Lat,Lng,Bank Count
122,79021,"Cotton Center, TX 79021, USA",104,26.5,56250.0,17961,25.961538,33.9501,-102.0,0
499,55063,"Pine City, MN 55063, USA",9171,43.2,47556.0,22709,13.608113,45.8233,-92.9704,11
234,97711,"Ashwood, OR 97711, USA",123,34.5,27188.0,12480,41.463415,44.7135,-120.615,0
319,97396,"Willamina, OR 97396, USA",2827,44.3,36848.0,18929,16.943757,45.1917,-123.514,0
34,35956,"Boaz, AL 35956, USA",8716,40.0,46844.0,19615,12.322166,34.1586,-86.1141,8


## Save Graphs

In [6]:
# Save the DataFrame as a csv


## Plot Graphs

In [7]:
# Build a scatter plot for each data type 


In [8]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Median Age"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Median Age vs. Bank Count by Zip Code")
plt.ylabel("Median Age")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])

# Save the figure
plt.savefig("output_analysis/Age_BankCount.png")

# Show plot
plt.show()

NameError: name 'selected_zips' is not defined

In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Household Income"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Household Income vs. Bank Count by Zip Code")
plt.ylabel("Household Income ($)")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])
plt.ylim([-2.5, 230000])

# Save the figure
plt.savefig("output_analysis/HouseholdIncome_BankCount.png")

# Show plot
plt.show()

In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Per Capita Income"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Per Capita Income vs. Bank Count by Zip Code")
plt.ylabel("Per Capita Income (%)")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])
plt.ylim([0, 165000])

# Save the figure
plt.savefig("output_analysis/PerCapitaIncome_BankCount.png")

# Show plot
plt.show()

In [None]:
# Build a scatter plot for each data type
plt.scatter(selected_zips["Bank Count"], 
            selected_zips["Poverty Rate"],
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
plt.title("Poverty Rate vs. Bank Count by Zip Code")
plt.ylabel("Poverty Rate (%)")
plt.xlabel("Bank Count")
plt.grid(True)
plt.xlim([-2.5, 202])
plt.ylim([-2.5, 102])

# Save the figure
plt.savefig("output_analysis/PovertyRate_BankCount.png")

# Show plot
plt.show()