# Banking Deserts

![SpeedyCash](Img/cash.jpg)

The below script uncovers the well-known phenomenon of [Banking Deserts](https://en.wikipedia.org/wiki/Banking_desert). The concept is simple: many neighborhoods with predominantly low-income and elderly populations tend to have inadequate coverage of banking services. This leads such communities to be  vulnerable to predatory loan and pricey check casher providers.

For more information on "Banking Deserts" read this Atlantic Article [banking deserts](http://www.theatlantic.com/business/archive/2016/03/banking-desert-ny-fed/473436/).

* For the next 45 minutes we'll be creating a data visualization to understand how prominent the "banking desert" phenomenon truly is. In order to accomplish this, we will be utilizing the US Census and Google Geocoder and Radar APIs.



Example Visualization:
  ![Per Capita Income Example](PerCapitaIncome_BankCount.png)


In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time
import os
import json

# Google Places API Key
api_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__')))))
file_name = os.path.join(api_dir, "api_keys.json")
data = json.load(open(file_name))

gkey = data['google_places_api_key']

In [21]:
file_name = os.path.join("Resources","Census_Data.csv")
census_pd = pd.read_csv(file_name)
census_pd.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate
0,15081,"South Heights, PA 15081, USA",342,50.2,31500.0,22177,20.760234
1,20615,"Broomes Island, MD 20615, USA",424,43.4,114375.0,43920,5.188679
2,50201,"Nevada, IA 50201, USA",8139,40.4,56619.0,28908,7.777368
3,84020,"Draper, UT 84020, USA",42751,30.4,89922.0,33164,4.39288
4,39097,"Louise, MS 39097, USA",495,58.0,26838.0,17399,34.949495


In [22]:
# Randomly select 700 zip codes locations that have at least 100 residents
# Hint: `pd.sample()`
# Hint: `pd[pd[astype(int) > 100`]]`

sample_data_df = census_pd[census_pd['Population'].astype(int) > 100].sample(n=10, random_state=20)

# Visualize the DataFrame
sample_data_df.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate
658,26202,"Fenwick, WV 26202, USA",762,42.1,49205.0,17950,3.543307
452,60645,"Chicago, IL 60645, USA",46913,35.0,48918.0,25500,18.419201
344,90210,"Beverly Hills, CA 90210, USA",21548,46.4,132254.0,111364,7.420642
425,13068,"Freeville, NY 13068, USA",5693,44.0,66480.0,30384,7.72879
10,1922,"Byfield, MA 01922, USA",3025,44.4,109301.0,40983,2.181818


In [33]:
# Create blank columns in DataFrame for lat/lng
sample_data_df['Lat'] = ''
sample_data_df['Lng'] = ''

# Loop through and grab the lat/lng for each of the selected zips using Google maps
# Inside the loop add the lat/lng to our DataFrame
# Note: Be sure to use try/except to handle cities with missing data
row_count = 0

for index, row in sample_data_df.iterrows():
    target_url = "https://maps.googleapis.com/maps/api/geocode/json?"
    target_url += "address=" + str(row['Zipcode'])
    target_url += "&key=" + gkey
    
    #print("Now retrieve city #:" + str(row_count))
    #print(target_url)
    sample_location = requests.get(target_url).json()
    
    try:
        sample_lat = sample_location['results'][0]['geometry']['location']['lat']
        sample_lng = sample_location['results'][0]['geometry']['location']['lng']
        sample_data_df.set_value(index, "Lat", sample_lat)
        sample_data_df.set_value(index, "Lng", sample_lng)
    except:
        print("Error with city data. Skipping")
        continue
    
    row_count+=1
    
sample_data_df.head(10)

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Lat,Lng
658,26202,"Fenwick, WV 26202, USA",762,42.1,49205.0,17950,3.543307,38.2252,-80.6216
452,60645,"Chicago, IL 60645, USA",46913,35.0,48918.0,25500,18.419201,42.0105,-87.6926
344,90210,"Beverly Hills, CA 90210, USA",21548,46.4,132254.0,111364,7.420642,34.103,-118.41
425,13068,"Freeville, NY 13068, USA",5693,44.0,66480.0,30384,7.72879,42.5015,-76.3522
10,1922,"Byfield, MA 01922, USA",3025,44.4,109301.0,40983,2.181818,38.3035,-85.0085
578,18519,"Scranton, PA 18519, USA",4838,41.6,38990.0,24672,11.67838,41.4635,-75.6266
18,95008,"Campbell, CA 95008, USA",45187,37.3,87204.0,44023,7.395932,37.277,-121.953
576,69334,"Bayard, NE 69334, USA",2305,46.5,43575.0,21241,12.104121,41.8136,-103.313
456,14715,"Bolivar, NY 14715, USA",2763,40.2,45030.0,22257,15.635179,42.0766,-78.1792
14,81087,"Vilas, CO 81087, USA",192,26.8,34500.0,13210,2.083333,37.3507,-102.45


In [53]:
# Create an empty column for bank count
sample_data_df['Bank Count'] = ''

# Re-loop through the DataFrame and run a Google Places search to get all banks in 5 mile radius (8000 meters)
# Inside the loop add the bank count to our DataFrame

for index, row in sample_data_df.iterrows(): 
    radius = '8000'
    target_type = "bank"
    lat = row['Lat']
    lng = row['Lng']
    
    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    target_url = url + "?location=" + str(lat) + "," + str(lng) + "&radius=" + radius + \
    "&types=" + target_type + "&key=" + gkey
    
    bank_data = requests.get(target_url).json()
    
    sample_data_df['Bank Count'] = len(bank_data['results'])


# Visualize the DataFrame
#print(target_url)
#print(json.dumps(bank_data, indent=4))
sample_data_df.head(10)

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Lat,Lng,Bank Count
658,26202,"Fenwick, WV 26202, USA",762,42.1,49205.0,17950,3.543307,38.2252,-80.6216,0
452,60645,"Chicago, IL 60645, USA",46913,35.0,48918.0,25500,18.419201,42.0105,-87.6926,0
344,90210,"Beverly Hills, CA 90210, USA",21548,46.4,132254.0,111364,7.420642,34.103,-118.41,0
425,13068,"Freeville, NY 13068, USA",5693,44.0,66480.0,30384,7.72879,42.5015,-76.3522,0
10,1922,"Byfield, MA 01922, USA",3025,44.4,109301.0,40983,2.181818,38.3035,-85.0085,0
578,18519,"Scranton, PA 18519, USA",4838,41.6,38990.0,24672,11.67838,41.4635,-75.6266,0
18,95008,"Campbell, CA 95008, USA",45187,37.3,87204.0,44023,7.395932,37.277,-121.953,0
576,69334,"Bayard, NE 69334, USA",2305,46.5,43575.0,21241,12.104121,41.8136,-103.313,0
456,14715,"Bolivar, NY 14715, USA",2763,40.2,45030.0,22257,15.635179,42.0766,-78.1792,0
14,81087,"Vilas, CO 81087, USA",192,26.8,34500.0,13210,2.083333,37.3507,-102.45,0


In [12]:
# Save the DataFrame as a csv


In [13]:
# Build a scatter plot for each data type 


In [14]:
# Build a scatter plot for each data type
#plt.scatter(selected_zips["Bank Count"], 
#            selected_zips["Median Age"],
#            edgecolor="black", linewidths=1, marker="o", 
#            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
#plt.title("Median Age vs. Bank Count by Zip Code")
#plt.ylabel("Median Age")
#plt.xlabel("Bank Count")
#plt.grid(True)
#plt.xlim([-2.5, 202])

# Save the figure
#plt.savefig("output_analysis/Age_BankCount.png")

# Show plot
#plt.show()

In [15]:
# Build a scatter plot for each data type
#plt.scatter(selected_zips["Bank Count"], 
#            selected_zips["Household Income"],
#            edgecolor="black", linewidths=1, marker="o", 
#            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
#plt.title("Household Income vs. Bank Count by Zip Code")
#plt.ylabel("Household Income ($)")
#plt.xlabel("Bank Count")
#plt.grid(True)
#plt.xlim([-2.5, 202])
#plt.ylim([-2.5, 230000])

# Save the figure
#plt.savefig("output_analysis/HouseholdIncome_BankCount.png")

# Show plot
#plt.show()

In [16]:
# Build a scatter plot for each data type
#plt.scatter(selected_zips["Bank Count"], 
#            selected_zips["Per Capita Income"],
#            edgecolor="black", linewidths=1, marker="o", 
#            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
#plt.title("Per Capita Income vs. Bank Count by Zip Code")
#plt.ylabel("Per Capita Income (%)")
#plt.xlabel("Bank Count")
#plt.grid(True)
#plt.xlim([-2.5, 202])
#plt.ylim([0, 165000])
#
# Save the figure
#plt.savefig("output_analysis/PerCapitaIncome_BankCount.png")

# Show plot
#plt.show()

In [17]:
# Build a scatter plot for each data type
#plt.scatter(selected_zips["Bank Count"], 
#            selected_zips["Poverty Rate"],
#            edgecolor="black", linewidths=1, marker="o", 
#            alpha=0.8, label="Zip Codes")

# Incorporate the other graph properties
#plt.title("Poverty Rate vs. Bank Count by Zip Code")
#plt.ylabel("Poverty Rate (%)")
#plt.xlabel("Bank Count")
#plt.grid(True)
#plt.xlim([-2.5, 202])
#plt.ylim([-2.5, 102])

# Save the figure
#plt.savefig("output_analysis/PovertyRate_BankCount.png")

# Show plot
#plt.show()