# PPP Loans: EDA

In [1]:
import pandas as pd
import numpy as np
import subprocess
import matplotlib.pyplot as plt
import seaborn as sns

## Q0: Load Data

1. PPP Loan Level Data - SBA
2. Unemployment Rate by County by Month - FRED
3. County FIPS crosswalk
4. County lat/long

In [21]:
def scrape_coords(url):
    
    from bs4 import BeautifulSoup
    import requests
    
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'html.parser')
    
    table = soup.find_all('table')[0]
    
    coords = pd.read_html(str(table))[0]
    
    results = coords.loc[:,['FIPS','Latitude','Longitude']]
    
    results['FIPS'] = '0'+results['FIPS'].astype(str)
    
    results.to_csv('county_to_coords.csv',index=False)

In [22]:
scrape_coords('https://en.wikipedia.org/wiki/User:Michael_J/County_table')

## Q1: What is the distribution of PPP loans across the US

* Calculate at county level
* Use KDE to show spatial distribution

**STEPS**
1. Load PPP data into python
2. Remove any rows without a valid city or state

In [48]:
def add_county(data):
    
    counties = pd.read_csv("./ZIP-COUNTY-FIPS_2017-06.csv", dtype='object')
    
    data['BorrowerZip'] = data['BorrowerZip'].str[:5]
    
    data = data.merge(counties, left_on = ['BorrowerState','BorrowerZip'], right_on = ['STATE','ZIP'])
    
    data.drop(['ZIP','COUNTYNAME','STATE',
               'CLASSFP','Gender','Veteran',
               'NonProfit','Race','Ethnicity'],axis=1,inplace=True)
    
    data.rename(columns={'STCOUNTYFP': 'BorrowerFIPS'},inplace=True)
    
    return data

def add_coords(data):

    coords = pd.read_csv('./county_to_coords.csv',dtype='object')
    
    data = data.merge(coords,left_on=['BorrowerFIPS'], right_on=['FIPS'])
    
    data.drop('FIPS',axis=1,inplace=True)
    
    data['Latitude'] = (data['Latitude'].
                        str.replace("\u2014", '-')[:-1])
    
    data['Longitude'] = (data['Longitude'].
                         str.replace("\u2014", '-')[:-1])
    
    return data


def load_PPP(file_path):
        
    data = pd.read_csv(file_path, nrows=100)
    
    data = data.loc[~(data['BorrowerCity'].isin(['','N/A',np.nan])) & ~(data['BorrowerState'].isin(['','N/A',np.nan])),]
    
    data = add_county(data)
    
    data = add_coords(data)
    
    return data



PPP_loans = load_PPP("./public_150k_plus_220930.csv")

PPP_loans.head()

Unnamed: 0,LoanNumber,DateApproved,SBAOfficeCode,ProcessingMethod,BorrowerName,BorrowerAddress,BorrowerCity,BorrowerState,BorrowerZip,LoanStatusDate,...,BusinessType,OriginatingLenderLocationID,OriginatingLender,OriginatingLenderCity,OriginatingLenderState,ForgivenessAmount,ForgivenessDate,BorrowerFIPS,Latitude,Longitude
0,5502308207,08/08/2020,1084,PPP,"KAKIVIK ASSET MANAGEMENT, LLC",5015 BUSINESS PARK BLVD,ANCHORAGE,AK,99503,07/22/2021,...,Limited Liability Company(LLC),116975,Northrim Bank,ANCHORAGE,AK,9615884.57,06/11/2021,2020,+61.177549°,–149.274354°
1,3569947101,04/11/2020,1084,PPP,"I. C. E. SERVICES, INC",2606 C Street,ANCHORAGE,AK,99503,08/20/2021,...,Corporation,56102,KeyBank National Association,CLEVELAND,OH,5751925.36,07/22/2021,2020,+61.177549°,–149.274354°
2,7234997101,04/14/2020,1084,PPP,"TATITLEK TECHNOLOGIES, INC.",561 E. 36th Avenue,ANCHORAGE,AK,99503,10/21/2020,...,Corporation,56102,KeyBank National Association,CLEVELAND,OH,,,2020,+61.177549°,–149.274354°
3,2465287203,04/16/2020,1084,PPP,"YULISTA TACTICAL, LLC",5015 BUSINESS PARK BLVD Suite 3000,ANCHORAGE,AK,99503,08/20/2021,...,Limited Liability Company(LLC),56102,KeyBank National Association,CLEVELAND,OH,,,2020,+61.177549°,–149.274354°
4,6033737702,05/01/2020,1084,PPP,AHTNA SUPPORT AND TRAINING SERVICES LLC,110 W 38th Ave Ste 100,Anchorage,AK,99503,11/18/2021,...,,9551,"Bank of America, National Association",CHARLOTTE,NC,532354.01,09/20/2021,2020,+61.177549°,–149.274354°


In [83]:
PPP_loans['Longitude'].str[:-1].replace('\u2208','(())')

0     –149.274354
1     –149.274354
2     –149.274354
3     –149.274354
4     –149.274354
         ...     
82    –134.178781
83    –134.178781
84    –131.106685
85    –145.023141
86            NaN
Name: Longitude, Length: 87, dtype: object

In [77]:
a = PPP_loans.loc[0,'Longitude'][0]

str()

'–'