# Joining Hurricane and Housing Dataframes 

In [8]:
#Importing libraries needed
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
%matplotlib inline
import numpy as np
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

# Joining Housing Values with Hurricanes 
In order to do logistic regression on our data we need to join the datasets. We will use the join method joining the hurricane dataset into the housing dataset. 

Documentation can be found here: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html

In [15]:
hurricane = pd.read_csv(r'data\hurricane_clean.csv')
hurricane.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,1,Apalachicola
1,7/10/2005,19.46,30.0,34.9,2,Apalachicola
2,7/11/2005,17.0,32.0,38.0,2,Apalachicola
3,10/7/2016,10.74,21.9,27.1,3,Apalachicola
4,10/8/2016,8.05,15.0,21.9,3,Apalachicola


In [18]:
hurricane['HurricaneName'] = hurricane['HurricaneName'].astype(str).map({'1': 'c', '2': 'd', '3': 'ma', '4':'ir', '5':'mi'})

In [19]:
hurricane.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [20]:
#saving the h3 dataframe
hurricane.to_csv(r'data\hurricane_name.csv', index=False)

## Joining 1 year before and after hurricane 

In [48]:
#opening dataframes 
hurricane1 = pd.read_csv(r'data\hurricane_name.csv')
housing1 =  pd.read_csv(r'data\housing_1year.csv')

In [49]:
hurricane1.head()

Unnamed: 0,DATE,AWND,WSF2,WSF5,HurricaneName,City
0,8/14/2004,5.82,13.0,15.0,c,Apalachicola
1,7/10/2005,19.46,30.0,34.9,d,Apalachicola
2,7/11/2005,17.0,32.0,38.0,d,Apalachicola
3,10/7/2016,10.74,21.9,27.1,ma,Apalachicola
4,10/8/2016,8.05,15.0,21.9,ma,Apalachicola


In [50]:
housing1.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,115043.8176,147424.4069,28.14631,0
1,Orlando,c,16,146810.0341,206367.0165,40.567379,0
2,Miami,c,20,182874.7001,274300.9334,49.993921,0
3,Tampa,c,50,125603.8709,176823.2995,40.778543,0
4,Saint Petersburg,c,84,106546.6985,151807.2415,42.479536,0


In [51]:
#setting the index to city so that we use .join()
hurricane1.set_index(['City', 'HurricaneName'], inplace = True)
housing1.set_index(['City', 'HurricaneName'], inplace = True)

In [52]:
#joining the housing dataframe into the hurricane dataframe 
df1 = hurricane1.join(housing1, how='inner')

In [53]:
#reseting the index
df1.reset_index(inplace = True)
df1.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,84666.67437,128305.1212,51.541468,1
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,100185.4978,148513.775,48.238795,0
2,Apalachicola,d,7/11/2005,17.0,32.0,38.0,12877,100185.4978,148513.775,48.238795,0
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,124189.2028,155516.923,25.2258,1
4,Apalachicola,ir,9/10/2017,19.01,32.0,44.1,12877,124189.2028,155516.923,25.2258,1


In [54]:
#saving the h1 dataframe
df1.to_csv(r'data\oneyear.csv', index=False)

## Joining 6 months before and after hurricane 

In [64]:
#opening the dataframes 
hurricane6 = pd.read_csv(r'data\hurricane_clean.csv')
housing6 =  pd.read_csv(r'data\housing_6months.csv')

In [65]:
#setting the index to city so that we use .join()
hurricane6.set_index(['City'], inplace = True)
housing6.set_index(['City'], inplace = True)

In [66]:
#joining the housing dataframe into the hurricane dataframe 
df6 = hurricane6.join(housing6, how='inner')

In [67]:
#reseting the index
df6.reset_index(inplace = True)
df6.head()

Unnamed: 0,City,DATE,AWND,WSF2,WSF5,HurricaneName,SizeRank,bc,ac,bd,...,percentc,percentd,percentma,percentir,percentmi,boolc,boold,boolma,boolir,boolmi
0,Apalachicola,8/14/2004,5.82,13.0,15.0,1,12877,91915.39335,114408.1491,112287.8968,...,24.471152,24.881677,3.115506,11.045477,11.724797,1,0,0,1,1
1,Apalachicola,7/10/2005,19.46,30.0,34.9,2,12877,91915.39335,114408.1491,112287.8968,...,24.471152,24.881677,3.115506,11.045477,11.724797,1,0,0,1,1
2,Apalachicola,7/11/2005,17.0,32.0,38.0,2,12877,91915.39335,114408.1491,112287.8968,...,24.471152,24.881677,3.115506,11.045477,11.724797,1,0,0,1,1
3,Apalachicola,10/7/2016,10.74,21.9,27.1,3,12877,91915.39335,114408.1491,112287.8968,...,24.471152,24.881677,3.115506,11.045477,11.724797,1,0,0,1,1
4,Apalachicola,10/8/2016,8.05,15.0,21.9,3,12877,91915.39335,114408.1491,112287.8968,...,24.471152,24.881677,3.115506,11.045477,11.724797,1,0,0,1,1


In [68]:
#saving the h6 dataframe
df6.to_csv(r'data\sixmonths.csv', index=False)

## Joining 3 months before and after hurricane 

In [69]:
#opening the dataframes 
hurricane3 = pd.read_csv(r'data\hurricane_clean.csv')
housing3 =  pd.read_csv(r'data\housing_3months.csv')

In [70]:
#setting the index to city so that we use .join()
hurricane3.set_index(['City'], inplace = True)
housing3.set_index(['City'], inplace = True)

In [71]:
#joining the housing dataframe into the hurricane dataframe 
df3 = hurricane3.join(housing3, how='inner')

In [72]:
#reseting the index
df3.reset_index(inplace = True)
df3.head()

Unnamed: 0,City,DATE,AWND,WSF2,WSF5,HurricaneName,SizeRank,bc,ac,bd,...,percentc,percentd,percentma,percentir,percentmi,boolc,boold,boolma,boolir,boolmi
0,Apalachicola,8/14/2004,5.82,13.0,15.0,1,12877,97342.4766,107842.5207,118373.8681,...,10.786703,13.359012,2.213214,6.87801,7.389938,0,0,0,1,1
1,Apalachicola,7/10/2005,19.46,30.0,34.9,2,12877,97342.4766,107842.5207,118373.8681,...,10.786703,13.359012,2.213214,6.87801,7.389938,0,0,0,1,1
2,Apalachicola,7/11/2005,17.0,32.0,38.0,2,12877,97342.4766,107842.5207,118373.8681,...,10.786703,13.359012,2.213214,6.87801,7.389938,0,0,0,1,1
3,Apalachicola,10/7/2016,10.74,21.9,27.1,3,12877,97342.4766,107842.5207,118373.8681,...,10.786703,13.359012,2.213214,6.87801,7.389938,0,0,0,1,1
4,Apalachicola,10/8/2016,8.05,15.0,21.9,3,12877,97342.4766,107842.5207,118373.8681,...,10.786703,13.359012,2.213214,6.87801,7.389938,0,0,0,1,1


In [73]:
#saving the h3 dataframe
df3.to_csv(r'data\threemonths.csv', index=False)

## Joining top tier housing before and after hurricane 

In [74]:
#opening dataframes
hurricanet = pd.read_csv(r'data\hurricane_clean.csv')
housingt =  pd.read_csv(r'data\toptier1year.csv')

In [75]:
#setting the index to city so that we use .join()
hurricanet.set_index(['City'], inplace = True)
housingt.set_index(['City'], inplace = True)

In [76]:
#joining the housing dataframe into the hurricane dataframe 
dft = hurricanet.join(housingt, how='inner')

In [77]:
#saving the dft dataframe
dft.reset_index(inplace = True)
dft.head()

Unnamed: 0,City,DATE,AWND,WSF2,WSF5,HurricaneName,SizeRank,bc,ac,bd,...,percentc,percentd,percentma,percentir,percentmi,boolc,boold,boolma,boolir,boolmi
0,Apalachicola,8/14/2004,5.82,13.0,15.0,1,12877,205983.955,309761.0651,241873.3344,...,50.381162,48.643917,8.237653,18.451984,15.893959,1,0,0,1,1
1,Apalachicola,7/10/2005,19.46,30.0,34.9,2,12877,205983.955,309761.0651,241873.3344,...,50.381162,48.643917,8.237653,18.451984,15.893959,1,0,0,1,1
2,Apalachicola,7/11/2005,17.0,32.0,38.0,2,12877,205983.955,309761.0651,241873.3344,...,50.381162,48.643917,8.237653,18.451984,15.893959,1,0,0,1,1
3,Apalachicola,10/7/2016,10.74,21.9,27.1,3,12877,205983.955,309761.0651,241873.3344,...,50.381162,48.643917,8.237653,18.451984,15.893959,1,0,0,1,1
4,Apalachicola,10/8/2016,8.05,15.0,21.9,3,12877,205983.955,309761.0651,241873.3344,...,50.381162,48.643917,8.237653,18.451984,15.893959,1,0,0,1,1


In [78]:
#saving the dft dataframe
dft.to_csv(r'data\top.csv', index=False)

## Joining bottom tier before and after hurricane 

In [79]:
#opening dataframes
hurricaneb = pd.read_csv(r'data\hurricane_clean.csv')
housingb =  pd.read_csv(r'data\bottomtier1year.csv')

In [80]:
#setting the index to city so that we use .join()
hurricaneb.set_index(['City'], inplace = True)
housingb.set_index(['City'], inplace = True)

In [81]:
#joining the housing dataframe into the hurricane dataframe 
dfb = hurricaneb.join(housingb, how='inner')

In [82]:
#saving the dft dataframe
dfb.reset_index(inplace = True)
dfb.head()

Unnamed: 0,City,DATE,AWND,WSF2,WSF5,HurricaneName,SizeRank,bc,ac,bd,...,percentc,percentd,percentma,percentir,percentmi,boolc,boold,boolma,boolir,boolmi
0,Apalachicola,8/14/2004,5.82,13.0,15.0,1,12877,30464.93986,46467.43038,35905.97635,...,52.527563,51.289271,24.954656,49.808378,38.192255,0,0,0,1,1
1,Apalachicola,7/10/2005,19.46,30.0,34.9,2,12877,30464.93986,46467.43038,35905.97635,...,52.527563,51.289271,24.954656,49.808378,38.192255,0,0,0,1,1
2,Apalachicola,7/11/2005,17.0,32.0,38.0,2,12877,30464.93986,46467.43038,35905.97635,...,52.527563,51.289271,24.954656,49.808378,38.192255,0,0,0,1,1
3,Apalachicola,10/7/2016,10.74,21.9,27.1,3,12877,30464.93986,46467.43038,35905.97635,...,52.527563,51.289271,24.954656,49.808378,38.192255,0,0,0,1,1
4,Apalachicola,10/8/2016,8.05,15.0,21.9,3,12877,30464.93986,46467.43038,35905.97635,...,52.527563,51.289271,24.954656,49.808378,38.192255,0,0,0,1,1


In [83]:
#saving the dfb dataframe
dfb.to_csv(r'data\bottom.csv', index=False)