## Data Preparation: Notebook shows how you prepare your data and explains why by including…  
- Instructions or code needed to get and prepare the raw data for analysis  
- Code comments and text to explain what your data preparation code does  
- Valid justifications for why the steps you took are appropriate for the problem you are solving  

# Joining Hurricane and Housing Dataframes 

In [1]:
#Importing libraries needed
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
%matplotlib inline
import numpy as np
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

## Obtaining Data 

In [2]:
#opening dataframes 
bottom =  pd.read_csv(r'data\bottom_housing.csv')
middle = pd.read_csv(r'data\middle_housing.csv')
top = pd.read_csv(r'data\top_housing.csv')
hurricane = pd.read_csv(r'data\hurricane_clean.csv')

In [3]:
bottom.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,46528.30349,52803.3279,13.486467,0.0
1,Orlando,c,16,75863.27537,88560.22345,16.73662,0.0
2,Miami,c,20,86752.55847,106338.7075,22.577028,0.0
3,Tampa,c,50,51585.4106,61309.89329,18.851227,0.0
4,Saint Petersburg,c,84,47796.13229,57880.05754,21.097785,0.0


In [4]:
middle.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,120287.1799,136338.2043,13.343919,0.0
1,Orlando,c,16,153628.1167,178133.799,15.9513,0.0
2,Miami,c,20,196585.3564,242294.981,23.251795,1.0
3,Tampa,c,50,134130.4031,158405.6253,18.098225,0.0
4,Saint Petersburg,c,84,112809.7002,134746.595,19.44593,0.0


In [5]:
top.head()

Unnamed: 0,City,HurricaneName,SizeRank,b,a,percent,bool
0,Jacksonville,c,12,219711.2386,249137.7656,13.393273,0.0
1,Orlando,c,16,268593.099,311622.3096,16.020222,0.0
2,Miami,c,20,438933.0461,531656.5319,21.124745,0.0
3,Tampa,c,50,314461.3881,368173.9723,17.08082,0.0
4,Saint Petersburg,c,84,245925.9512,292718.6866,19.027165,0.0


In [6]:
hurricane.head()

Unnamed: 0,DATE,AWND,WSF2,HurricaneName,City
0,8/14/2004,5.82,13.0,1,Apalachicola
1,7/10/2005,19.46,30.0,2,Apalachicola
2,7/11/2005,17.0,32.0,2,Apalachicola
3,10/7/2016,10.74,21.9,3,Apalachicola
4,10/8/2016,8.05,15.0,3,Apalachicola


In [7]:
hurricane.isna().sum()

DATE             0
AWND             0
WSF2             0
HurricaneName    0
City             0
dtype: int64

# Joining Housing Values with Hurricanes 
In order to do logistic regression on our data we need to join the datasets. We will use the join method joining the hurricane dataset into the housing dataset. 

Documentation can be found here: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html

In [8]:
hurricane['HurricaneName'] = hurricane['HurricaneName'].astype(str).map({'1': 'c', '2': 'd', '3': 'ma', '4':'ir', '5':'mi'})

In [9]:
hurricane.head()

Unnamed: 0,DATE,AWND,WSF2,HurricaneName,City
0,8/14/2004,5.82,13.0,c,Apalachicola
1,7/10/2005,19.46,30.0,d,Apalachicola
2,7/11/2005,17.0,32.0,d,Apalachicola
3,10/7/2016,10.74,21.9,ma,Apalachicola
4,10/8/2016,8.05,15.0,ma,Apalachicola


In [10]:
#saving the data
hurricane.to_csv(r'data\hurricane_name.csv', index=False)

In [11]:
#opening dataframes 
hurricane = pd.read_csv(r'data\hurricane_name.csv')
#setting the index to city and HurricaneName so that we use .join()
hurricane.set_index(['City', 'HurricaneName'], inplace = True)

### Writing a Function
This function will be used to join our housing data with our hurricane data

In [12]:
def join(df):
    #setting the index
    df.set_index(['City', 'HurricaneName'], inplace = True)
    #joining the dataframe
    df = hurricane.join(df, how='inner')
    #reseting the index
    df.reset_index(inplace = True)
    return df

## Joining Bottom Tier Home Values with Hurricane Data

In [13]:
bottom_hurricane = join(bottom)
bottom_hurricane

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,12877,33025.67901,41196.08834,24.739565,1.0
1,Apalachicola,d,7/10/2005,19.46,30.0,12877,40369.46137,50891.84431,26.065205,0.0
2,Apalachicola,d,7/11/2005,17.00,32.0,12877,40369.46137,50891.84431,26.065205,0.0
3,Apalachicola,ma,10/7/2016,10.74,21.9,12877,48548.85811,52980.36723,9.127937,0.0
4,Apalachicola,ma,10/8/2016,8.05,15.0,12877,48548.85811,52980.36723,9.127937,0.0
...,...,...,...,...,...,...,...,...,...,...
294,Winter Haven,ma,10/7/2016,13.42,25.9,395,69267.39054,80862.30360,16.739353,1.0
295,Winter Haven,ma,10/8/2016,8.72,18.1,395,69267.39054,80862.30360,16.739353,1.0
296,Winter Haven,mi,10/11/2018,7.38,16.1,395,94132.64063,110238.51310,17.109764,1.0
297,Winter Haven,mi,10/10/2018,13.20,25.1,395,94132.64063,110238.51310,17.109764,1.0


In [14]:
bottom_hurricane.isna().sum()

City             0
HurricaneName    0
DATE             0
AWND             0
WSF2             0
SizeRank         0
b                0
a                0
percent          0
bool             0
dtype: int64

## Joining Middle Tier Home Values with Hurricane Data 

In [15]:
middle_hurricane = join(middle)
middle_hurricane

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,12877,91915.39335,114408.1491,24.471152,1.0
1,Apalachicola,d,7/10/2005,19.46,30.0,12877,112287.89680,140227.0087,24.881677,0.0
2,Apalachicola,d,7/11/2005,17.00,32.0,12877,112287.89680,140227.0087,24.881677,0.0
3,Apalachicola,ma,10/7/2016,10.74,21.9,12877,124938.95560,128831.4366,3.115506,0.0
4,Apalachicola,ma,10/8/2016,8.05,15.0,12877,124938.95560,128831.4366,3.115506,0.0
...,...,...,...,...,...,...,...,...,...,...
291,West Palm Beach,ma,10/7/2016,19.69,31.1,158,197641.97590,216511.5252,9.547339,0.0
292,West Palm Beach,ma,10/8/2016,10.51,17.0,158,197641.97590,216511.5252,9.547339,0.0
293,West Palm Beach,mi,10/11/2018,6.93,17.0,158,235358.13210,250692.0721,6.515152,0.0
294,West Palm Beach,mi,10/10/2018,12.75,25.1,158,235358.13210,250692.0721,6.515152,0.0


## Joining Top Tier Home Values with Hurricane Data

In [16]:
top_hurricane = join(top)
top_hurricane

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,12877,221794.3003,275297.2698,24.122788,1.0
1,Apalachicola,d,7/10/2005,19.46,30.0,12877,269923.5452,339439.8016,25.754054,0.0
2,Apalachicola,d,7/11/2005,17.00,32.0,12877,269923.5452,339439.8016,25.754054,0.0
3,Apalachicola,ma,10/7/2016,10.74,21.9,12877,255217.2827,259168.4086,1.548142,0.0
4,Apalachicola,ma,10/8/2016,8.05,15.0,12877,255217.2827,259168.4086,1.548142,0.0
...,...,...,...,...,...,...,...,...,...,...
296,West Palm Beach,ma,10/7/2016,19.69,31.1,158,375111.3104,393070.8116,4.787779,0.0
297,West Palm Beach,ma,10/8/2016,10.51,17.0,158,375111.3104,393070.8116,4.787779,0.0
298,West Palm Beach,mi,10/11/2018,6.93,17.0,158,414414.1815,434088.9600,4.747612,0.0
299,West Palm Beach,mi,10/10/2018,12.75,25.1,158,414414.1815,434088.9600,4.747612,0.0


### Saving the Datasets

In [17]:
#saving the h1 dataframe as housing_1year
bottom_hurricane.to_csv(r'data\bottom_hurricane.csv', index=False)
#saving the h1 dataframe as housing_1year
middle_hurricane.to_csv(r'data\middle_hurricane.csv', index=False)
#saving the h1 dataframe as housing_1year
top_hurricane.to_csv(r'data\top_hurricane.csv', index=False)