# DEPENDENCIES AND SETUP

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from scipy.stats import linregress
from pprint import pprint



# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')



# LOAD RESEACHED DATA

In [3]:
# File to Load 
zillow = "Research_data/Zip_Residential_Zillow_.csv"
_
# Read the Zillow csv with Zip Codes and Medians price per Sq foot. (from 1996 to 2019)
zillow_df = pd.read_csv(zillow, encoding="utf-8")
zillow_df = zillow_df.fillna(0)
zillow_df.head()

Unnamed: 0,RegionID,RegionName,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08
0,61639,10025,New York,NY,New York-Newark-Jersey City,New York County,1,200.0,200.0,201.0,...,1316,1304,1291,1289,1288,1275,1261,1256,1247,1240
1,84654,60657,Chicago,IL,Chicago-Naperville-Elgin,Cook County,2,156.0,157.0,157.0,...,478,479,481,483,488,493,493,488,481,477
2,61637,10023,New York,NY,New York-Newark-Jersey City,New York County,3,359.0,359.0,359.0,...,1582,1571,1557,1542,1522,1500,1488,1487,1478,1469
3,91982,77494,Katy,TX,Houston-The Woodlands-Sugar Land,Harris County,4,67.0,68.0,68.0,...,113,114,114,114,114,114,114,114,113,112
4,84616,60614,Chicago,IL,Chicago-Naperville-Elgin,Cook County,5,199.0,200.0,201.0,...,525,527,529,532,534,534,531,523,515,509


In [4]:
# Format the Data Frame and check the NJ state for consistency
zillow_df = zillow_df.rename(columns ={"RegionName" : "zip_code"})
zillow_df["zip_code"] = zillow_df.zip_code.map("{:05}".format)
checknj = zillow_df[(zillow_df["State"] == "NJ")]

checknj


Unnamed: 0,RegionID,zip_code,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08
63,60545,07030,Hoboken,NJ,New York-Newark-Jersey City,Hudson County,64,158.0,158.0,158.0,...,728,727,727,727,727,726,723,720,718,718
79,61148,08701,Lakewood,NJ,New York-Newark-Jersey City,Ocean County,80,60.0,60.0,60.0,...,164,165,165,165,165,165,165,166,166,166
172,60639,07302,Jersey City,NJ,New York-Newark-Jersey City,Hudson County,173,113.0,114.0,114.0,...,801,800,795,790,788,785,780,776,776,775
186,61169,08753,Toms River,NJ,New York-Newark-Jersey City,Ocean County,187,74.0,74.0,74.0,...,165,165,165,165,165,163,163,164,165,165
212,60518,07002,Bayonne,NJ,New York-Newark-Jersey City,Hudson County,213,76.0,76.0,76.0,...,203,203,204,204,205,208,209,209,209,209
238,60599,07093,West New York,NJ,New York-Newark-Jersey City,Hudson County,239,82.0,81.0,81.0,...,288,291,293,293,294,296,298,299,300,302
282,61200,08831,Monroe,NJ,New York-Newark-Jersey City,Middlesex County,283,95.0,95.0,94.0,...,187,188,190,191,191,191,191,189,189,188
334,60594,07087,Union City,NJ,New York-Newark-Jersey City,Hudson County,335,60.0,60.0,60.0,...,266,268,269,269,269,265,260,259,260,261
541,60560,07047,North Bergen,NJ,New York-Newark-Jersey City,Hudson County,542,73.0,73.0,74.0,...,237,239,240,241,243,243,242,242,243,245
545,61175,08759,Manchester,NJ,New York-Newark-Jersey City,Ocean County,546,65.0,65.0,65.0,...,116,116,116,116,116,115,114,114,114,114


In [5]:
# getting column names

columns = list(zillow_df.columns.values)
print(len(columns))
columns

288


['RegionID',
 'zip_code',
 'City',
 'State',
 'Metro',
 'CountyName',
 'SizeRank',
 '1996-04',
 '1996-05',
 '1996-06',
 '1996-07',
 '1996-08',
 '1996-09',
 '1996-10',
 '1996-11',
 '1996-12',
 '1997-01',
 '1997-02',
 '1997-03',
 '1997-04',
 '1997-05',
 '1997-06',
 '1997-07',
 '1997-08',
 '1997-09',
 '1997-10',
 '1997-11',
 '1997-12',
 '1998-01',
 '1998-02',
 '1998-03',
 '1998-04',
 '1998-05',
 '1998-06',
 '1998-07',
 '1998-08',
 '1998-09',
 '1998-10',
 '1998-11',
 '1998-12',
 '1999-01',
 '1999-02',
 '1999-03',
 '1999-04',
 '1999-05',
 '1999-06',
 '1999-07',
 '1999-08',
 '1999-09',
 '1999-10',
 '1999-11',
 '1999-12',
 '2000-01',
 '2000-02',
 '2000-03',
 '2000-04',
 '2000-05',
 '2000-06',
 '2000-07',
 '2000-08',
 '2000-09',
 '2000-10',
 '2000-11',
 '2000-12',
 '2001-01',
 '2001-02',
 '2001-03',
 '2001-04',
 '2001-05',
 '2001-06',
 '2001-07',
 '2001-08',
 '2001-09',
 '2001-10',
 '2001-11',
 '2001-12',
 '2002-01',
 '2002-02',
 '2002-03',
 '2002-04',
 '2002-05',
 '2002-06',
 '2002-07',
 '200

In [6]:

columns.remove('RegionID')
columns.remove('zip_code')
columns.remove('City')
columns.remove('State')
columns.remove('Metro')
columns.remove('CountyName')
columns.remove('SizeRank')
columns

['1996-04',
 '1996-05',
 '1996-06',
 '1996-07',
 '1996-08',
 '1996-09',
 '1996-10',
 '1996-11',
 '1996-12',
 '1997-01',
 '1997-02',
 '1997-03',
 '1997-04',
 '1997-05',
 '1997-06',
 '1997-07',
 '1997-08',
 '1997-09',
 '1997-10',
 '1997-11',
 '1997-12',
 '1998-01',
 '1998-02',
 '1998-03',
 '1998-04',
 '1998-05',
 '1998-06',
 '1998-07',
 '1998-08',
 '1998-09',
 '1998-10',
 '1998-11',
 '1998-12',
 '1999-01',
 '1999-02',
 '1999-03',
 '1999-04',
 '1999-05',
 '1999-06',
 '1999-07',
 '1999-08',
 '1999-09',
 '1999-10',
 '1999-11',
 '1999-12',
 '2000-01',
 '2000-02',
 '2000-03',
 '2000-04',
 '2000-05',
 '2000-06',
 '2000-07',
 '2000-08',
 '2000-09',
 '2000-10',
 '2000-11',
 '2000-12',
 '2001-01',
 '2001-02',
 '2001-03',
 '2001-04',
 '2001-05',
 '2001-06',
 '2001-07',
 '2001-08',
 '2001-09',
 '2001-10',
 '2001-11',
 '2001-12',
 '2002-01',
 '2002-02',
 '2002-03',
 '2002-04',
 '2002-05',
 '2002-06',
 '2002-07',
 '2002-08',
 '2002-09',
 '2002-10',
 '2002-11',
 '2002-12',
 '2003-01',
 '2003-02',
 '20

# Analyzing columns
#with 288 coumns we have: index 0 to 287
#we will use only date columns, so columns 7 to 287

#before crisis: columns 7 to 126
#after crisis: columns  168 to 287
#crisis: columns 127 to 167

In [11]:
# Iterations through rows and columns

y = {}
for i, r in zillow_df.iloc[1:].iterrows():
    y[zillow_df.iloc[i,1]] = []

    for j in range (7, 288):
        if j == 7:
            vcalc = 0
            y[zillow_df.iloc[i,1]].append(vcalc)
        else:
            vcalc = ((zillow_df.iloc[i,j]/zillow_df.iloc[i,j-1]))
            y[zillow_df.iloc[i,1]].append(vcalc)

y       

{'60657': [0,
  1.0064102564102564,
  1.0,
  1.0063694267515924,
  1.0,
  1.0,
  1.0063291139240507,
  1.0,
  1.0062893081761006,
  1.0,
  1.00625,
  1.0,
  1.0062111801242235,
  1.0,
  1.0061728395061729,
  1.0,
  1.0122699386503067,
  1.006060606060606,
  1.0060240963855422,
  1.0119760479041917,
  1.0118343195266273,
  1.0175438596491229,
  1.0114942528735633,
  1.0113636363636365,
  1.0168539325842696,
  1.011049723756906,
  1.0163934426229508,
  1.0161290322580645,
  1.0158730158730158,
  1.015625,
  1.0153846153846153,
  1.0101010101010102,
  1.015,
  1.019704433497537,
  1.0144927536231885,
  1.0142857142857142,
  1.0140845070422535,
  1.0138888888888888,
  1.0136986301369864,
  1.0135135135135136,
  1.008888888888889,
  1.013215859030837,
  1.0130434782608695,
  1.0128755364806867,
  1.0127118644067796,
  1.0167364016736402,
  1.0123456790123457,
  1.0121951219512195,
  1.0120481927710843,
  1.0119047619047619,
  1.0156862745098039,
  1.0115830115830116,
  1.0076335877862594,
 

In [12]:
percent_df = pd.DataFrame.from_dict(y, orient="index", columns = columns)
percent_df

Unnamed: 0,1996-04,1996-05,1996-06,1996-07,1996-08,1996-09,1996-10,1996-11,1996-12,1997-01,...,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08
60657,0,1.006410,1.000000,1.006369,1.000000,1.000000,1.006329,1.000000,1.006289,1.000000,...,1.000000,1.002092,1.004175,1.004158,1.010352,1.010246,1.000000,0.989858,0.985656,0.991684
10023,0,1.000000,1.000000,1.000000,1.002786,1.000000,1.002778,1.000000,1.005540,1.002755,...,0.993095,0.993047,0.991088,0.990366,0.987030,0.985545,0.992000,0.999328,0.993948,0.993911
77494,0,1.014925,1.000000,0.985294,1.000000,1.000000,0.985075,0.984848,1.000000,0.984615,...,1.000000,1.008850,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.991228,0.991150
60614,0,1.005025,1.005000,1.000000,1.000000,1.000000,1.000000,0.995025,1.000000,1.000000,...,1.001908,1.003810,1.003795,1.005671,1.003759,1.000000,0.994382,0.984934,0.984704,0.988350
77449,0,1.000000,1.000000,1.000000,1.000000,1.020408,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.010753,1.010638,1.000000,0.989474,1.000000,1.010638,1.010526,1.010417
79936,0,1.000000,1.018519,0.981818,1.000000,1.000000,1.018519,1.000000,1.000000,0.981818,...,1.000000,1.011236,1.000000,1.011111,1.000000,1.000000,1.000000,1.000000,1.010989,1.000000
77084,0,1.020408,0.980000,1.000000,1.000000,1.020408,1.000000,1.000000,1.000000,0.980000,...,1.011364,1.011236,1.011111,1.000000,1.000000,0.989011,1.000000,1.011111,1.010989,1.010870
10002,0,1.000000,1.000000,1.000000,1.000000,0.996296,1.000000,1.003717,1.003704,1.003690,...,0.990845,0.990050,0.992821,1.000723,0.997832,0.988414,0.984615,0.985863,0.988679,0.995420
10467,0,1.011628,1.000000,1.000000,1.000000,1.000000,0.988506,1.000000,1.000000,1.011628,...,1.003731,1.007435,1.007380,1.010989,1.014493,1.010714,1.000000,1.000000,1.003534,1.007042
11226,0,1.000000,1.000000,1.000000,1.000000,1.010526,1.000000,1.010417,1.010309,1.010204,...,1.001712,1.000000,0.998291,1.006849,1.008503,1.003373,1.000000,0.998319,0.994949,0.998308


In [13]:

percent_df = percent_df.fillna(0)


In [14]:

sum_zipcode = np.sum(percent_df, axis=1
                    )
sum_zipcode = pd.DataFrame(sum_zipcode)
sum_zipcode = sum_zipcode.sort_values(by =[0], ascending=False)

Greatest10 = sum_zipcode.head(10)
Lowest10 = sum_zipcode.tail(10)

print(Greatest10)
print(Lowest10)

         0
43832  inf
38922  inf
55341  inf
35816  inf
67672  inf
27959  inf
60071  inf
39735  inf
55312  inf
65248  inf
                0
47842  279.898991
46226  279.891247
44137  279.887034
61038  279.879455
61065  279.879415
78380  279.873536
60466  279.819028
61008  279.796059
61012  279.756888
61104  279.706824


In [25]:
percent_df.to_csv("Research_data/percent_output.csv", encoding='utf-8')