In [None]:
#give and save logistical regression models for each dataset 
#only use one set of before and after per model 
#run the crosstabs between increase and decrease 

# Obtaining Data

In [70]:
#Importing libraries needed
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
%matplotlib inline
import numpy as np
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [71]:
#opening the datasets 
one = pd.read_csv(r'data\oneyear.csv')
six =  pd.read_csv(r'data\sixmonths.csv')
three =  pd.read_csv(r'data\threemonths.csv')
top =  pd.read_csv(r'data\top.csv')
bottom =  pd.read_csv(r'data\bottom.csv')

# Logistic Regression

In [81]:
# For our modeling steps
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import log_loss

# For demonstrative pruposes
from scipy.special import logit, expit

In [73]:
logreg = LogisticRegression(random_state=56)

### Modeling 1 Year Before and After

Let's model each hurricane 1 year before and after

#### Modeling hurricane Charley

In [116]:
#isolating the dataframe to just needed the hurricane
one_c = one[one.HurricaneName == 'c']

#removing duplicate values for city 
#keeping the value that has the highest wind speed
one_c = one_c.sort_values('AWND', ascending=False).drop_duplicates('City').sort_index()
one_c.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
0,Apalachicola,c,8/14/2004,5.82,13.0,15.0,12877,84666.67437,128305.1212,51.541468,1
10,Brooksville,c,8/13/2004,7.38,13.0,15.0,1410,112191.494,165069.9124,47.132288,0
21,Clearwater,c,8/13/2004,9.62,25.1,32.0,233,126715.5981,175387.9455,38.410699,0
33,Cross City,c,8/13/2004,4.47,14.1,17.9,11954,36778.69082,52569.71289,42.935248,0
46,Daytona Beach,c,8/14/2004,8.28,8.1,8.1,744,100013.2474,149991.548,49.971681,0


In [117]:
#y is prediction variable 
#X is features 
y = one_c['bool']
X = one_c.drop(['bool', 'City', 'HurricaneName', 'DATE', 'a', 'percent'], axis=1)

In [118]:
logreg.fit(X, y)

one_c_r = logreg.predict(X)

In [119]:
##create a dictionary to store results of tests
model_dict = {}
model_dict['charley_1year'] = one_c_r
print(model_dict)


{'charley_1year': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0], dtype=int64)}


#### Modeling hurricane Dennis

In [120]:
#isolating the dataframe to just needed the hurricane
one_d = one[one.HurricaneName == 'd']

#removing duplicate values for city 
#keeping the value that has the highest wind speed
one_d = one_d.sort_values('AWND', ascending=False).drop_duplicates('City').sort_index()
one_d.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
1,Apalachicola,d,7/10/2005,19.46,30.0,34.9,12877,100185.4978,148513.775,48.238795,0
12,Brooksville,d,7/10/2005,13.42,28.0,42.9,1410,123020.4111,197060.2214,60.18498,1
23,Clearwater,d,7/10/2005,20.13,36.0,40.9,233,141068.6094,205845.4045,45.918646,0
35,Cross City,d,7/10/2005,11.41,25.9,38.0,11954,40945.00696,61390.49091,49.93401,0
47,Daytona Beach,d,7/10/2005,8.95,21.9,29.1,744,115929.5759,183952.4139,58.676,1


In [121]:
#y is prediction variable 
#X is features 
y = one_d['bool']
X = one_d.drop(['bool', 'City', 'HurricaneName', 'DATE', 'a', 'percent'], axis=1)

In [122]:
logreg.fit(X, y)

one_d_r = logreg.predict(X)

In [123]:
##create a dictionary to store results of tests
model_dict['dennis_1year'] = one_d_r

#### Modeling hurricane Matthew

In [124]:
#isolating the dataframe to just needed the hurricane
one_ma = one[one.HurricaneName == 'ma']

#removing duplicate values for city 
#keeping the value that has the highest wind speed
one_ma = one_ma.sort_values('AWND', ascending=False).drop_duplicates('City').sort_index()
one_ma.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
6,Apalachicola,ma,10/7/2016,10.74,21.9,27.1,12877,122030.2594,138967.7141,13.879717,0
16,Brooksville,ma,10/7/2016,19.69,30.0,38.9,1410,124037.6722,155561.7068,25.414887,1
28,Clearwater,ma,10/7/2016,20.58,31.1,38.9,233,153405.0845,191761.4726,25.003336,1
40,Cross City,ma,10/7/2016,16.33,27.1,38.9,11954,57669.26893,56478.52674,-2.064778,0
52,Daytona Beach,ma,10/7/2016,27.51,52.1,70.9,744,113228.9592,141843.4794,25.27138,1


In [125]:
#y is prediction variable 
#X is features 
y = one_ma['bool']
X = one_ma.drop(['bool', 'City', 'HurricaneName', 'DATE', 'a', 'percent'], axis=1)

In [126]:
logreg.fit(X, y)

one_ma_r = logreg.predict(X)

In [127]:
##create a dictionary to store results of tests
model_dict['matthew_1year'] = one_ma_r

#### Modeling hurricane Irma

In [128]:
#isolating the dataframe to just needed the hurricane
one_i = one[one.HurricaneName == 'ir']

#removing duplicate values for city 
#keeping the value that has the highest wind speed
one_i = one_i.sort_values('AWND', ascending=False).drop_duplicates('City').sort_index()
one_i.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
3,Apalachicola,ir,9/11/2017,20.8,36.9,48.1,12877,124189.2028,155516.923,25.2258,1
14,Brooksville,ir,9/10/2017,20.8,42.9,64.0,1410,138365.8972,168975.2349,22.122025,1
26,Clearwater,ir,9/10/2017,25.5,49.0,72.9,233,171026.8981,206989.2568,21.027312,0
37,Cross City,ir,9/11/2017,35.12,40.9,55.9,11954,54022.76784,61224.84277,13.331555,0
50,Daytona Beach,ir,9/10/2017,24.61,53.9,78.1,744,125481.4038,154724.6047,23.304809,1


In [129]:
#y is prediction variable 
#X is features 
y = one_i['bool']
X = one_i.drop(['bool', 'City', 'HurricaneName', 'DATE', 'a', 'percent'], axis=1)

In [130]:
logreg.fit(X, y)

one_i_r = logreg.predict(X)

In [131]:
##create a dictionary to store results of tests
model_dict['irma_1year'] = one_i_r

#### Modeling hurricane Michael

In [132]:
#isolating the dataframe to just needed the hurricane
one_mi = one[one.HurricaneName == 'mi']

#removing duplicate values for city 
#keeping the value that has the highest wind speed
one_mi = one_mi.sort_values('AWND', ascending=False).drop_duplicates('City').sort_index()
one_mi.head()

Unnamed: 0,City,HurricaneName,DATE,AWND,WSF2,WSF5,SizeRank,b,a,percent,bool
8,Apalachicola,mi,10/10/2018,29.75,63.1,89.0,12877,138967.7141,167685.6852,20.665211,1
19,Brooksville,mi,10/10/2018,13.2,25.1,36.0,1410,155561.7068,176917.6445,13.728274,0
31,Clearwater,mi,10/10/2018,18.34,30.0,38.9,233,191761.4726,220337.6162,14.901921,0
43,Cross City,mi,10/10/2018,14.99,23.0,35.1,11954,56478.52674,69933.92327,23.823916,1
56,Daytona Beach,mi,10/9/2018,10.51,23.0,32.0,744,141843.4794,167309.8879,17.95388,1


In [133]:
#y is prediction variable 
#X is features 
y = one_mi['bool']
X = one_mi.drop(['bool', 'City', 'HurricaneName', 'DATE', 'a', 'percent'], axis=1)

In [134]:
logreg.fit(X, y)

one_mi_r = logreg.predict(X)

In [135]:
##create a dictionary to store results of tests
model_dict['michael_1year'] = one_mi_r

In [136]:
import json
print(json.dumps(model_dict, indent=4, sort_keys=True, default=str))

{
    "charley_1year": "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]",
    "dennis_1year": "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]",
    "irma_1year": "[0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]",
    "matthew_1year": "[0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]",
    "michael_1year": "[1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]"
}
