In [1]:
# Import Dependencies
import pandas as pd
import os

In [2]:
# Change working directory to FHA_loans folder
os.chdir('../Resources/Outside_Resources/FHA_loans')

# Create path to new current working directory
path = os.getcwd()

# List files found in current path
files = os.listdir(path)

In [3]:
# Find all excel files from folder
excel_files = [f for f in files if f[-4:] == 'xlsx']

In [8]:
# Create empty DataFrame
df = pd.DataFrame()

# Create for loop to append the data into one DataFrame
for e in excel_files:
    data = pd.read_excel(e)
    df = df.append(data)

# Print DataFrame (Takes a while to load all the data from excel, over 2+ million rows) 
df.head()

Unnamed: 0,Property State,Property City,Property County,Property Zip,Origination Mortgagee/Sponsor Originator,Originating Mortgagee Number,Sponsor Name,Sponsor Number,Down Payment Source,Non Profit Number,Product Type,Loan Purpose,Property Type,Interest Rate,Original Mortgage Amount,Endorsement Year,Endorsement Month
0,AK,ANCHORAGE,ANCHORAGE,99501,FIRST RATE FINANCIAL,Not Available,CALIBER HOME LOANS INC,30992,Borrower,Not Avail,Fixed Rate,Purchase,Single Family,3.875,343660,2019,4
1,AK,ANCHORAGE,ANCHORAGE,99501,RESIDENTIAL MORTGAGE LLC,12511,Not Available,Not Available,Borrower,Not Avail,Fixed Rate,Purchase,Condo,4.375,245471,2019,4
2,AK,ANCHORAGE,ANCHORAGE,99502,ALASKAUSA MORTGAGE COMPANY LLC,75641,ALASKA USA FEDERAL CREDIT UNION,59034,Borrower,Not Avail,Fixed Rate,Purchase,Condo,4.875,157102,2019,4
3,AK,ANCHORAGE,ANCHORAGE,99502,MOVEMENT MORTGAGE LLC,26264,Not Available,Not Available,Borrower,Not Avail,Fixed Rate,Purchase,Single Family,4.375,140409,2019,4
4,AK,ANCHORAGE,ANCHORAGE,99502,RESIDENTIAL MORTGAGE LLC,12511,Not Available,Not Available,Borrower,Not Avail,Fixed Rate,Purchase,Condo,4.5,191468,2019,4


In [9]:
# Create a filter for the state of Florida
florida_filter = df[df["Property State"] == "FL"]
florida_filter.head()

Unnamed: 0,Property State,Property City,Property County,Property Zip,Origination Mortgagee/Sponsor Originator,Originating Mortgagee Number,Sponsor Name,Sponsor Number,Down Payment Source,Non Profit Number,Product Type,Loan Purpose,Property Type,Interest Rate,Original Mortgage Amount,Endorsement Year,Endorsement Month
10404,FL,GAINESVILLE,ALACHUA,32605,AMERICAN FINANCIAL NETWORK INC,18352,Not Available,Not Available,Borrower,Not Avail,Fixed Rate,Purchase,Single Family,5.125,161029,2019,4
10405,FL,GAINESVILLE,ALACHUA,32605,FIRST GUARANTY MORTGAGE CORP,75168,Not Available,Not Available,Borrower,Not Avail,Fixed Rate,Purchase,Single Family,4.125,270019,2019,4
10406,FL,GAINESVILLE,ALACHUA,32606,FIDELITY MORTGAGE SERVICES INC,Not Available,FREEDOM MORTGAGE CORPORATION,75159,Relative,Not Avail,Fixed Rate,Purchase,Condo,3.875,93279,2019,4
10407,FL,GAINESVILLE,ALACHUA,32606,ADVISORS MORTGAGE GROUP LLC,15483,Not Available,Not Available,Relative,Not Avail,Fixed Rate,Purchase,Single Family,4.375,96662,2019,4
10408,FL,GAINESVILLE,ALACHUA,32606,FIRST GUARANTY MORTGAGE CORP,75168,Not Available,Not Available,Relative,Not Avail,Fixed Rate,Purchase,Single Family,4.375,228779,2019,4


In [10]:
# Number of unique zip codes in the state of Florida
florida_filter['Property Zip'].nunique()

913

In [11]:
# Total count of FHA loans taken by zip code (2019-2020)
zip_total_count = florida_filter['Property State'].groupby(florida_filter['Property Zip']).count()
zip_total_count

Property Zip
32003    174
32008     28
32009     36
32011    139
32024    148
        ... 
34987    356
34990    114
34994     49
34996     21
34997    292
Name: Property State, Length: 913, dtype: int64

In [12]:
# Finding year and monthly totals for reference later if needed
year = florida_filter['Property State'].groupby(florida_filter['Endorsement  Year']).count()
month = florida_filter['Property State'].groupby(florida_filter['Endorsement Month ']).count()
print(year)
print("-------------------------------------")
print(month)

Endorsement  Year
2019    73425
2020    85410
2021    26998
Name: Property State, dtype: int64
-------------------------------------
Endorsement Month 
1     20669
2     15639
3     19096
4     20088
5     13020
6     13078
7     15309
8     15314
9     13373
10    13830
11    12369
12    14048
Name: Property State, dtype: int64


In [13]:
# # Seperate the DataFrame into 2 seperate years
# fl_2019 = df[(df["Property State"] == "FL") & (df['Endorsement  Year'] == 2019)]
# fl_2020 = df[(df["Property State"] == "FL") & (df['Endorsement  Year'] == 2020)]

# # Create new DataFrames for each year
# df2019 = fl_2019.groupby(['Property Zip', 'Endorsement Month ']).size().reset_index(name='FHA_2019')
# df2020 = fl_2020.groupby(['Property Zip', 'Endorsement Month ']).size().reset_index(name='FHA_2020')

In [14]:
# # Merge the two DataFrames back together with seperate counts per month
# merge_col = df2020.columns.difference(df2019.columns)
# total_counts = pd.merge(df2019, df2020[merge_col], left_index=True, right_index=True)
# total_counts.head()

In [15]:
sql_table = df[(df["Property State"] == "FL") & ((df['Endorsement  Year'] == 2019) | (df['Endorsement  Year'] == 2020) | (df['Endorsement  Year'] == 2021))]
sql_table = sql_table.groupby(['Property Zip', 'Endorsement Month ','Endorsement  Year']).size().reset_index(name='fha')
sql_table = sql_table.rename(columns={'Property Zip':'zipcode', 'Endorsement Month ':'month','Endorsement  Year':'year'})
sql_table = sql_table.sort_values(['zipcode','year','month']).reset_index()
sql_table = sql_table[['zipcode','month','year','fha']]
# sql_table.head()
sql_table['zipcode'].nunique()

913

In [16]:
# # Create engine for sql db
# from sqlalchemy import create_engine
# e = create_engine('sqlite:///../sqlite.db')

# # Push above DataFrame into sqlite
# total_counts.to_sql(name='fha_loans', con=e)

import sqlite3
from sqlite3 import Error

# Identify database path
db_path = os.path.abspath('./../../../sqlite.db')

# Connect to SQLite Database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Drop table if it exist
cursor.execute("DROP TABLE IF EXISTS fha_loans")

# Create table
sql_table.to_sql('fha_loans', conn, if_exists='replace', index=False)

# Close connection to SQLite Database
if conn:
    conn.close()