# Joining Hurricane and Housing Dataframes 

In [1]:
#Importing libraries needed
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
%matplotlib inline
import numpy as np
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

# Joining Housing Values with Hurricanes 
In order to do logistic regression on our data we need to join the datasets. We will use the join method joining the hurricane dataset into the housing dataset. 

Documentation can be found here: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html

In [2]:
hurricane = pd.read_csv(r'data\hurricane_clean.csv')
hurricane.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,1,Apalachicola
1,7/10/2005,19.46,30.0,34.9,2,Apalachicola
2,7/11/2005,17.0,32.0,38.0,2,Apalachicola
3,10/7/2016,10.74,21.9,27.1,3,Apalachicola
4,10/8/2016,8.05,15.0,21.9,3,Apalachicola


In [3]:
hurricane['HurricaneName'] = hurricane['HurricaneName'].astype(str).map({'1': 'c', '2': 'd', '3': 'ma', '4':'ir', '5':'mi'})

In [4]:
hurricane.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [5]:
#saving the h3 dataframe
hurricane.to_csv(r'data\hurricane_name.csv', index=False)

## Joining 1 year before and after hurricane 

In [6]:
#opening dataframes 
hurricane1 = pd.read_csv(r'data\hurricane_name.csv')
housing1 =  pd.read_csv(r'data\housing_1year.csv')

In [7]:
hurricane1.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [8]:
housing1.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,115043.8176,147424.4069,28.14631,0
1,Orlando,c,16,146810.0341,206367.0165,40.567379,0
2,Miami,c,20,182874.7001,274300.9334,49.993921,0
3,Tampa,c,50,125603.8709,176823.2995,40.778543,0
4,Saint Petersburg,c,84,106546.6985,151807.2415,42.479536,0


In [9]:
#setting the index to city so that we use .join()
hurricane1.set_index(['City', 'HurricaneName'], inplace = True)
housing1.set_index(['City', 'HurricaneName'], inplace = True)

In [10]:
#joining the housing dataframe into the hurricane dataframe 
df1 = hurricane1.join(housing1, how='inner')

In [11]:
#reseting the index
df1.reset_index(inplace = True)
df1.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,84666.67437,128305.1212,51.541468,1
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,100185.4978,148513.775,48.238795,0
2,Apalachicola,d,7/11/2005,17.0,32.0,38.0,12877,100185.4978,148513.775,48.238795,0
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,124189.2028,155516.923,25.2258,1
4,Apalachicola,ir,9/10/2017,19.01,32.0,44.1,12877,124189.2028,155516.923,25.2258,1


In [12]:
#saving the h1 dataframe
df1.to_csv(r'data\oneyear.csv', index=False)

## Joining 6 months before and after hurricane 

In [13]:
#opening dataframes 
hurricane6 = pd.read_csv(r'data\hurricane_name.csv')
housing6 =  pd.read_csv(r'data\housing_6months.csv')

In [14]:
hurricane6.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [15]:
housing6.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,120287.1799,136338.2043,13.343919,0
1,Orlando,c,16,153628.1167,178133.799,15.9513,0
2,Miami,c,20,196585.3564,242294.981,23.251795,1
3,Tampa,c,50,134130.4031,158405.6253,18.098225,0
4,Saint Petersburg,c,84,112809.7002,134746.595,19.44593,0


In [16]:
#setting the index to city so that we use .join()
hurricane6.set_index(['City', 'HurricaneName'], inplace = True)
housing6.set_index(['City', 'HurricaneName'], inplace = True)

In [17]:
#joining the housing dataframe into the hurricane dataframe 
df6 = hurricane1.join(housing6, how='inner')

In [18]:
#reseting the index
df6.reset_index(inplace = True)
df6.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,91915.39335,114408.1491,24.471152,1
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,112287.8968,140227.0087,24.881677,0
2,Apalachicola,d,7/11/2005,17.0,32.0,38.0,12877,112287.8968,140227.0087,24.881677,0
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,127923.4267,142053.1799,11.045477,1
4,Apalachicola,ir,9/10/2017,19.01,32.0,44.1,12877,127923.4267,142053.1799,11.045477,1


In [19]:
#saving the h1 dataframe
df6.to_csv(r'data\sixmonths.csv', index=False)

## Joining 3 months before and after hurricane 

In [20]:
#opening dataframes 
hurricane3 = pd.read_csv(r'data\hurricane_name.csv')
housing3 =  pd.read_csv(r'data\housing_3months.csv')

In [21]:
hurricane3.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [22]:
housing3.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,123724.9765,131992.8624,6.682471,0
1,Orlando,c,16,157942.1114,170731.8666,8.097749,0
2,Miami,c,20,205284.1728,228670.425,11.392136,0
3,Tampa,c,50,138894.6936,152272.3797,9.631531,0
4,Saint Petersburg,c,84,116680.584,128696.3694,10.298016,0


In [23]:
#setting the index to city so that we use .join()
hurricane3.set_index(['City', 'HurricaneName'], inplace = True)
housing3.set_index(['City', 'HurricaneName'], inplace = True)

In [24]:
#joining the housing dataframe into the hurricane dataframe 
df3 = hurricane1.join(housing3, how='inner')

In [25]:
#reseting the index
df3.reset_index(inplace = True)
df3.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,97342.4766,107842.5207,10.786703,0
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,118373.8681,134187.4471,13.359012,0
2,Apalachicola,d,7/11/2005,17.0,32.0,38.0,12877,118373.8681,134187.4471,13.359012,0
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,130949.8639,139956.6082,6.87801,1
4,Apalachicola,ir,9/10/2017,19.01,32.0,44.1,12877,130949.8639,139956.6082,6.87801,1


In [26]:
#saving the h1 dataframe
df3.to_csv(r'data\threemonths.csv', index=False)

## Joining 1 year before and after hurricane (top tier)

In [27]:
#opening dataframes 
hurricanet = pd.read_csv(r'data\hurricane_name.csv')
housingt =  pd.read_csv(r'data\toptier1year.csv')

In [28]:
hurricanet.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [29]:
housingt.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,210023.628,269327.0727,28.236559,0
1,Orlando,c,16,257310.7912,363486.4125,41.263571,0
2,Miami,c,20,410383.7472,599733.7569,46.139744,0
3,Tampa,c,50,297085.4976,410355.6034,38.127107,0
4,Saint Petersburg,c,84,232929.2555,329604.9426,41.504313,0


In [30]:
#setting the index to city so that we use .join()
hurricanet.set_index(['City', 'HurricaneName'], inplace = True)
housingt.set_index(['City', 'HurricaneName'], inplace = True)

In [31]:
#joining the housing dataframe into the hurricane dataframe 
dft = hurricane1.join(housingt, how='inner')

In [32]:
#reseting the index
dft.reset_index(inplace = True)
dft.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,205983.955,309761.0651,50.381162,1
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,241873.3344,359529.9991,48.643917,0
2,Apalachicola,d,7/11/2005,17.0,32.0,38.0,12877,241873.3344,359529.9991,48.643917,0
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,250774.4322,297047.2899,18.451984,1
4,Apalachicola,ir,9/10/2017,19.01,32.0,44.1,12877,250774.4322,297047.2899,18.451984,1


In [33]:
#saving the h1 dataframe
dft.to_csv(r'data\top.csv', index=False)

## Joining 1 year before and after hurricane (bottom tier)

In [34]:
#opening dataframes 
hurricaneb = pd.read_csv(r'data\hurricane_name.csv')
housingb =  pd.read_csv(r'data\bottomtier1year.csv')

In [35]:
hurricaneb.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [36]:
housingb.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,44567.92638,57229.67105,28.409993,0
1,Orlando,c,16,72303.0376,102573.8266,41.866552,0
2,Miami,c,20,80952.39604,120303.6121,48.610317,1
3,Tampa,c,50,48166.95487,69551.37222,44.396449,0
4,Saint Petersburg,c,84,45122.90669,65981.61823,46.226436,0


In [37]:
#setting the index to city so that we use .join()
hurricaneb.set_index(['City', 'HurricaneName'], inplace = True)
housingb.set_index(['City', 'HurricaneName'], inplace = True)

In [38]:
#joining the housing dataframe into the hurricane dataframe 
dfb = hurricane1.join(housingb, how='inner')

In [39]:
#reseting the index
dfb.reset_index(inplace = True)
dfb.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,30464.93986,46467.43038,52.527563,1
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,35905.97635,54321.88969,51.289271,0
2,Apalachicola,d,7/11/2005,17.0,32.0,38.0,12877,35905.97635,54321.88969,51.289271,0
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,48530.44212,72702.66809,49.808378,1
4,Apalachicola,ir,9/10/2017,19.01,32.0,44.1,12877,48530.44212,72702.66809,49.808378,1


In [40]:
#saving the h1 dataframe
dfb.to_csv(r'data\bottom.csv', index=False)

## Checking Crosstabs

### 1 year before and after

In [41]:
#check crosstabs 
df1[df1['bool'] == 0].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,315.0,315.0,315.0,315.0,315.0,315.0,315.0,315.0
mean,13.923651,27.633016,36.967302,921.720635,176305.827707,215575.671253,24.342529,0.0
std,6.862933,12.701339,17.514614,2463.998533,55621.738065,61110.580623,15.350324,0.0
min,2.91,0.0,0.0,12.0,36778.69082,52569.71289,-2.064778,0.0
25%,8.835,19.0,25.1,67.0,137006.4578,175130.2995,13.835094,0.0
50%,12.75,23.9,32.0,158.0,166146.4842,206989.2568,18.1339,0.0
75%,17.0,31.55,40.9,390.0,205063.2212,249221.0568,37.194415,0.0
max,40.26,79.0,104.9,12877.0,308451.5687,348632.7151,56.873456,0.0


In [42]:
#check crosstabs 
df1[df1['bool'] == 1].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,78.0,78.0,78.0,78.0,78.0,78.0,78.0,78.0
mean,14.324487,28.012821,38.187179,2308.923077,138046.00355,189122.095159,36.822694,1.0
std,7.559851,12.593519,18.356487,3912.835302,29055.244728,48273.967519,17.179685,0.0
min,4.92,6.9,8.1,16.0,56478.52674,69933.92327,17.483798,1.0
25%,8.95,19.9,25.1,110.0,123669.8468,156820.042225,23.304809,1.0
50%,12.97,23.9,33.1,636.5,141354.14565,182591.82145,28.6325,1.0
75%,17.45,31.775,44.1,1644.0,160734.512125,214262.8752,56.745,1.0
max,40.71,70.9,89.0,12877.0,185239.9829,295959.7052,76.822653,1.0


### 6 months before and after

In [43]:
#check crosstabs 
df6[df6['bool'] == 0].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,295.0,295.0,295.0,295.0,295.0,295.0,295.0,295.0
mean,13.984847,27.912881,37.398983,889.637288,185142.673781,204674.482058,11.303943,0.0
std,6.894346,12.900748,17.750983,2364.811855,57702.534399,60232.824588,7.149602,0.0
min,4.03,0.0,0.0,12.0,47433.41348,55475.02634,-0.304135,0.0
25%,8.835,19.0,25.1,89.0,143236.4984,158405.6253,5.995205,0.0
50%,12.3,23.9,32.0,190.0,175538.384,194821.1826,9.733574,0.0
75%,17.0,32.0,42.5,547.0,224035.0595,240455.6542,16.040337,0.0
max,38.92,79.0,104.9,12877.0,328091.1098,346404.3781,28.550268,0.0


In [44]:
#check crosstabs 
df6[df6['bool'] == 1].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0
mean,14.058469,27.092857,36.638776,2122.397959,155507.419096,184628.009394,18.543131,1.0
std,7.339515,11.969474,17.492571,3865.849021,39176.779164,51224.920943,9.395508,0.0
min,2.91,6.9,8.1,12.0,38971.28538,48205.34273,8.02659,1.0
25%,8.95,19.9,25.1,50.0,131778.2043,154504.62885,11.041181,1.0
50%,12.97,23.9,33.1,529.0,170958.3782,187886.4279,12.872659,1.0
75%,17.3925,31.1,40.0,1410.0,179526.5171,207349.9328,29.445031,1.0
max,40.71,70.9,91.0,12877.0,237636.1255,316361.4918,34.925821,1.0


### 3 months before and after

In [45]:
#check crosstabs 
df3[df3['bool'] == 0].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,309.0,309.0,309.0,309.0,309.0,309.0,309.0,309.0
mean,13.851521,27.538835,36.972492,999.553398,186746.25898,196911.73731,5.830512,0.0
std,6.774603,12.62463,17.352553,2644.472397,58908.830319,60087.82584,3.657482,0.0
min,4.03,0.0,0.0,12.0,48954.04127,55730.32546,0.383902,0.0
25%,8.95,19.9,25.1,50.0,146363.727,153459.7703,3.504553,0.0
50%,12.3,23.9,32.0,158.0,175495.7879,184179.9267,4.761422,0.0
75%,16.55,31.1,40.9,704.0,224942.2132,229576.9998,7.60889,0.0
max,38.92,79.0,104.9,12877.0,335151.388,344055.505,15.496296,0.0


In [46]:
#check crosstabs 
df3[df3['bool'] == 1].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,84.0,84.0,84.0,84.0,84.0,84.0,84.0,84.0
mean,14.56119,28.332143,38.080952,1923.52381,167857.963374,186960.505673,11.025801,1.0
std,7.785572,12.868989,18.865028,3462.854117,40412.32795,49042.308234,4.80177,0.0
min,2.91,6.9,8.1,16.0,39935.07507,45273.54677,5.408594,1.0
25%,8.2225,19.45,25.1,190.0,141649.499525,157339.88855,6.588842,1.0
50%,13.2,25.5,34.0,529.0,173379.8641,187906.1747,8.518905,1.0
75%,18.68,33.1,44.325,1410.0,190990.3724,215248.7958,16.177124,1.0
max,40.71,70.9,91.0,12877.0,258764.6282,306883.0758,19.54962,1.0


### 1 year before and after (top)

In [47]:
#check crosstabs 
dft[dft['bool'] == 0].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,326.0,326.0,326.0,326.0,326.0,326.0,326.0,326.0
mean,14.17773,27.883436,37.412577,884.43865,323776.961287,379171.174568,18.462695,0.0
std,7.019189,12.747729,17.610425,2423.644893,115778.531729,132832.915844,15.69494,0.0
min,2.91,0.0,0.0,12.0,93659.63384,128379.3721,-3.18999,0.0
25%,8.7775,19.9,25.1,50.0,239925.33915,282507.9206,8.551174,0.0
50%,12.75,24.5,33.1,158.0,308086.6322,360198.59905,11.830917,0.0
75%,17.45,32.0,41.8,390.0,368644.0059,431304.8193,32.812471,0.0
max,40.26,79.0,104.9,12877.0,657614.9833,806155.7195,53.813962,0.0


In [48]:
#check crosstabs 
dft[dft['bool'] == 1].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0
mean,13.325733,26.58,36.013333,2439.346667,248972.888023,338098.721553,33.895865,1.0
std,6.738287,12.326921,17.864523,3956.606304,56187.80443,110951.437484,19.884905,0.0
min,4.92,6.9,8.1,16.0,128379.3721,148090.129,12.36308,1.0
25%,8.95,18.1,25.1,299.0,214738.3114,262361.2039,15.448414,1.0
50%,12.08,23.0,32.0,744.0,240315.8699,297047.2899,22.286392,1.0
75%,15.1,30.55,40.0,2210.0,282496.4548,426042.7441,55.300691,1.0
max,40.71,70.9,89.0,12877.0,389971.1956,615189.1515,72.934085,1.0


### 1 year before and after (bottom)

In [49]:
#check crosstabs 
dfb[dfb['bool'] == 0].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,216.0,216.0,216.0,216.0,216.0,216.0,216.0,216.0
mean,13.425324,27.025463,36.302778,683.115741,100513.558828,125175.636082,26.814756,0.0
std,6.714788,12.379324,17.152435,1801.205651,35203.040004,38762.319107,11.06946,0.0
min,4.03,10.1,13.0,12.0,35905.97635,54321.88969,7.647854,0.0
25%,8.5,18.1,25.1,106.0,71509.84332,94463.549265,17.953706,0.0
50%,12.08,23.0,31.55,190.0,96071.43336,129002.3846,24.529904,0.0
75%,16.33,30.275,40.9,704.0,129002.3846,154413.6125,32.274806,0.0
max,40.26,79.0,104.9,12877.0,171065.7092,207257.2684,52.350181,0.0


In [50]:
#check crosstabs 
dfb[dfb['bool'] == 1].describe()

Unnamed: 0,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
count,183.0,183.0,183.0,183.0,183.0,183.0,183.0,183.0
mean,14.798197,28.721858,40.063388,1006.704918,73045.943163,105950.667654,45.901956,1.0
std,7.039191,13.15888,26.368396,2426.726,18054.767695,25590.056822,12.353831,0.0
min,4.7,0.0,0.0,12.0,30464.93986,46467.43038,26.955859,1.0
25%,9.17,19.9,25.1,20.0,60023.16325,86019.586375,34.650076,1.0
50%,13.42,25.9,35.1,190.0,71558.43393,104717.9638,44.538904,1.0
75%,18.68,33.1,44.1,744.0,85433.3146,124345.488,55.723078,1.0
max,40.71,70.9,293.9,12877.0,109456.3301,167437.6376,87.186053,1.0
