# DEPENDENCIES AND SETUP

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from scipy.stats import linregress
from pprint import pprint

# Import API key

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')



# LOAD RESEACRHED DATA

In [2]:
# File to Load 
zillow = "Research_data/Zip_Residential_Zillow_.csv"
_
# Read the Zillow csv with Zip Codes and Medians price per Sq foot. (from 1996 to 2019)
zillow_df = pd.read_csv(zillow, encoding="utf-8")

zillow_df.head()

Unnamed: 0,RegionID,RegionName,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08
0,61639,10025,New York,NY,New York-Newark-Jersey City,New York County,1,200.0,200.0,201.0,...,1316,1304,1291,1289,1288,1275,1261,1256,1247,1240
1,84654,60657,Chicago,IL,Chicago-Naperville-Elgin,Cook County,2,156.0,157.0,157.0,...,478,479,481,483,488,493,493,488,481,477
2,61637,10023,New York,NY,New York-Newark-Jersey City,New York County,3,359.0,359.0,359.0,...,1582,1571,1557,1542,1522,1500,1488,1487,1478,1469
3,91982,77494,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,4,67.0,68.0,68.0,...,113,114,114,114,114,114,114,114,113,112
4,84616,60614,Chicago,IL,Chicago-Naperville-Elgin,Cook County,5,199.0,200.0,201.0,...,525,527,529,532,534,534,531,523,515,509


In [3]:
# Format the Data Frame and check the NJ state for consistency
zillow_df = zillow_df.rename(columns ={"RegionName" : "zip_code"})
zillow_df["zip_code"] = zillow_df.zip_code.map("{:05}".format)
checknj = zillow_df[(zillow_df["State"] == "NJ")]

checknj


Unnamed: 0,RegionID,zip_code,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08
63,60545,07030,Hoboken,NJ,New York-Newark-Jersey City,Hudson County,64,158.0,158.0,158.0,...,728,727,727,727,727,726,723,720,718,718
79,61148,08701,Lakewood,NJ,New York-Newark-Jersey City,Ocean County,80,60.0,60.0,60.0,...,164,165,165,165,165,165,165,166,166,166
172,60639,07302,Jersey City,NJ,New York-Newark-Jersey City,Hudson County,173,113.0,114.0,114.0,...,801,800,795,790,788,785,780,776,776,775
186,61169,08753,Toms River,NJ,New York-Newark-Jersey City,Ocean County,187,74.0,74.0,74.0,...,165,165,165,165,165,163,163,164,165,165
212,60518,07002,Bayonne,NJ,New York-Newark-Jersey City,Hudson County,213,76.0,76.0,76.0,...,203,203,204,204,205,208,209,209,209,209
238,60599,07093,West New York,NJ,New York-Newark-Jersey City,Hudson County,239,82.0,81.0,81.0,...,288,291,293,293,294,296,298,299,300,302
282,61200,08831,Monroe,NJ,New York-Newark-Jersey City,Middlesex County,283,95.0,95.0,94.0,...,187,188,190,191,191,191,191,189,189,188
334,60594,07087,Union City,NJ,New York-Newark-Jersey City,Hudson County,335,60.0,60.0,60.0,...,266,268,269,269,269,265,260,259,260,261
541,60560,07047,North Bergen,NJ,New York-Newark-Jersey City,Hudson County,542,73.0,73.0,74.0,...,237,239,240,241,243,243,242,242,243,245
545,61175,08759,Manchester,NJ,New York-Newark-Jersey City,Ocean County,546,65.0,65.0,65.0,...,116,116,116,116,116,115,114,114,114,114


In [4]:
# getting column names

columns = list(zillow_df.columns.values)
print(len(columns))
columns

288


['RegionID',
 'zip_code',
 'City',
 'State',
 'Metro',
 'CountyName',
 'SizeRank',
 '1996-04',
 '1996-05',
 '1996-06',
 '1996-07',
 '1996-08',
 '1996-09',
 '1996-10',
 '1996-11',
 '1996-12',
 '1997-01',
 '1997-02',
 '1997-03',
 '1997-04',
 '1997-05',
 '1997-06',
 '1997-07',
 '1997-08',
 '1997-09',
 '1997-10',
 '1997-11',
 '1997-12',
 '1998-01',
 '1998-02',
 '1998-03',
 '1998-04',
 '1998-05',
 '1998-06',
 '1998-07',
 '1998-08',
 '1998-09',
 '1998-10',
 '1998-11',
 '1998-12',
 '1999-01',
 '1999-02',
 '1999-03',
 '1999-04',
 '1999-05',
 '1999-06',
 '1999-07',
 '1999-08',
 '1999-09',
 '1999-10',
 '1999-11',
 '1999-12',
 '2000-01',
 '2000-02',
 '2000-03',
 '2000-04',
 '2000-05',
 '2000-06',
 '2000-07',
 '2000-08',
 '2000-09',
 '2000-10',
 '2000-11',
 '2000-12',
 '2001-01',
 '2001-02',
 '2001-03',
 '2001-04',
 '2001-05',
 '2001-06',
 '2001-07',
 '2001-08',
 '2001-09',
 '2001-10',
 '2001-11',
 '2001-12',
 '2002-01',
 '2002-02',
 '2002-03',
 '2002-04',
 '2002-05',
 '2002-06',
 '2002-07',
 '200

# Analyzing columns
#with 288 coumns we have: index 0 to 287
#we will use only date columns, so columns 7 to 287

#before crisis: columns 7 to 126
#after crisis: columns  168 to 287
#crisis: columns 127 to 167

In [None]:
# Iterations through rows and columns

y = {}
for i, r in zillow_df.iloc[1:].iterrows():
    y[zillow_df.iloc[i,1]] = []

    for j in range (7, 288):
        if j == 7:
            vcalc = 0
        else:
            vcalc = zillow_df.iloc[i,j]/zillow_df.iloc[i,j-1]
            y[zillow_df.iloc[i,1]].append(vcalc)
vcalc
y       