In [176]:
import pandas as pd
import numpy as np

In [177]:
data = pd.DataFrame({
    'Name': [
        'Irma',
        'Ian',
        'Wilma',
        'Frances',
        'Jeanne',
        'Charley',
        'Ivan',
        'Michael'
    ],
    'IHP': [
        1020976908.08,
        917127545.87,
        342259581.97,
        411815685.98,
        398618731.65,
        208970753.97,
        164509853.53,
        149572302.29
    ],
    'Year': [
        '2017-09-10',
        '2022-09-29',
        '2005-10-24',
        '2004-09-04',
        '2004-09-26',
        '2004-08-13',
        '2004-09-16',
        '2018-10-11'
    ],
    'Wind speed kt': [
        100,
        125,
        105,
        90,
        105,
        130,
        105,
        140
    ],
    'Affected Counties': [
        67,
        77,
        20,
        67,
        53,
        67,
        45,
        18
    ]
}, columns=['Name', 'IHP', 'Year', 'Wind speed kt', 'Affected Counties', 'Rainfall', 'Building Code'])
data.set_index('Name', inplace=True)

In [178]:
declarations = pd.read_csv('gatorcain_data/declarations_clean_data.csv')

# Get disaster Number

In [179]:
declarations = declarations[declarations['declarationType'] == 'DR']
declarations.set_index('disasterNumber', inplace=True)
declarations['Affected Counties'] = declarations['designatedArea'].str.count(',') + 1
declarations = declarations.drop('declarationType', axis=1)
declarations

Unnamed: 0_level_0,declarationDate,declarationTitle,designatedArea,Affected Counties
disasterNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1539,2004-08-13 00:00:00+00:00,TROPICAL STORM BONNIE AND HURRICANE CHARLEY,"Franklin (County),Wakulla (County),Hendry (Cou...",67
1545,2004-09-04 00:00:00+00:00,HURRICANE FRANCES,"Sarasota (County),Nassau (County),Okeechobee (...",67
1551,2004-09-16 00:00:00+00:00,HURRICANE IVAN,"Jackson (County),Wakulla (County),Washington (...",45
1561,2004-09-26 00:00:00+00:00,HURRICANE JEANNE,"Marion (County),Hernando (County),Palm Beach (...",53
1595,2005-07-10 00:00:00+00:00,HURRICANE DENNIS,"Calhoun (County),Gadsden (County),Wakulla (Cou...",20
1602,2005-08-28 00:00:00+00:00,HURRICANE KATRINA,"Santa Rosa (County),Escambia (County),Miami-Da...",11
1609,2005-10-24 00:00:00+00:00,HURRICANE WILMA,"Brevard (County),Hendry (County),DeSoto (Count...",20
1806,2008-10-27 00:00:00+00:00,HURRICANE GUSTAV,"Santa Rosa (County),Gulf (County),Okaloosa (Co...",6
4084,2012-10-18 00:00:00+00:00,HURRICANE ISAAC,"Escambia (County),St. Lucie (County),Okaloosa ...",12
4280,2016-09-28 00:00:00+00:00,HURRICANE HERMINE,"Marion (County),Levy (County),Gadsden (County)...",26


# Getting The number of claims per zip

In [180]:
dtypes = {
    'incidentType': np.str_, 'county': np.str_, 'damagedStateAbbreviation': np.str_, 'damagedZipCode':np.str_,
    'declarationsDate': np.datetime64, 'disasterNumber': np.int32, 'ihpAmount': np.float64
}

claims = pd.read_csv('gatorcain_data/hurricane_FL_data.csv',
                     dtype=dtypes,
                     parse_dates=['declarationDate']
)
claims.head()

Unnamed: 0,incidentType,declarationDate,disasterNumber,county,damagedStateAbbreviation,damagedZipCode,ihpAmount
0,Hurricane,2022-09-29 00:00:00+00:00,4673,Brevard (County),FL,32935,0.0
1,Hurricane,2022-09-29 00:00:00+00:00,4673,Palm Beach (County),FL,33470,1000.0
2,Hurricane,2022-09-29 00:00:00+00:00,4673,Volusia (County),FL,32725,0.0
3,Hurricane,2022-09-29 00:00:00+00:00,4673,Lee (County),FL,33903,0.0
4,Hurricane,2022-09-29 00:00:00+00:00,4673,Polk (County),FL,33881,700.0


In [181]:
# Set IHP Amount
data.loc['Irma', 'IHP'] = claims.query("disasterNumber == 4337 or disasterNumber == 4341")["ihpAmount"].sum().round(2)
data.loc['Ian', 'IHP'] = claims.query("disasterNumber == 4673 or disasterNumber == 4675")["ihpAmount"].sum().round(2)
data.loc['Wilma', 'IHP'] = claims.query("disasterNumber == 1609")["ihpAmount"].sum().round(2)
data.loc['Frances', 'IHP'] = claims.query("disasterNumber == 1545")["ihpAmount"].sum().round(2)
data.loc['Jeanne', 'IHP'] = claims.query("disasterNumber == 1561")["ihpAmount"].sum().round(2)
data.loc['Charley', 'IHP'] = claims.query("disasterNumber == 1539")["ihpAmount"].sum().round(2)
data.loc['Ivan', 'IHP'] = claims.query("disasterNumber == 1551")["ihpAmount"].sum().round(2)
data.loc['Michael', 'IHP'] = claims.query("disasterNumber == 4399")["ihpAmount"].sum().round(2)
print(data['IHP'].astype(str))

Name
Irma       1020976908.08
Ian         968508915.93
Wilma       342259581.97
Frances     411815685.98
Jeanne      398618731.65
Charley     208970753.97
Ivan        164509853.53
Michael     149572302.29
Name: IHP, dtype: object


In [182]:
# Clean zip codes columns
damagedZipCodes = claims['damagedZipCode']
damagedZipCodes = damagedZipCodes.astype(str).str.replace("[^0-9]", "0")
damagedZipCodes = damagedZipCodes.astype(int)
claims['damagedZipCode'] = damagedZipCodes

  damagedZipCodes = damagedZipCodes.astype(str).str.replace("[^0-9]", "0")


In [183]:
#Get Zip Codes
florida_zip_codes = [x for x in claims['damagedZipCode'].unique().astype(int) if 32000 <= x <= 34999]
florida_zip_codes = sorted(florida_zip_codes)

In [184]:
# Create Dataframe
re_data = pd.DataFrame(columns=['Irma', 'Ian', 'Wilma', 'Frances', 'Jeanne', 'Charley', 'Ivan', 'Michael'], index=florida_zip_codes)
re_data.tail()

Unnamed: 0,Irma,Ian,Wilma,Frances,Jeanne,Charley,Ivan,Michael
34994,,,,,,,,
34995,,,,,,,,
34996,,,,,,,,
34997,,,,,,,,
34998,,,,,,,,


In [185]:
# Populate array
re_data['Irma'] = claims.query("disasterNumber == 4337 or disasterNumber == 4341")['damagedZipCode'].value_counts()
re_data['Ian'] = claims.query("disasterNumber == 4673 or disasterNumber == 4675")['damagedZipCode'].value_counts()
re_data['Wilma'] = claims.query("disasterNumber == 1609")['damagedZipCode'].value_counts()
re_data['Frances'] = claims.query("disasterNumber == 1545")['damagedZipCode'].value_counts()
re_data['Jeanne'] = claims.query("disasterNumber == 1561")['damagedZipCode'].value_counts()
re_data['Charley'] = claims.query("disasterNumber == 1539")['damagedZipCode'].value_counts()
re_data['Ivan'] = claims.query("disasterNumber == 1551")['damagedZipCode'].value_counts()
re_data['Michael'] = claims.query("disasterNumber == 1561")['damagedZipCode'].value_counts()

In [193]:
re_data.fillna(0, inplace=True)
print(re_data.head())
# re_data.to_csv('claims_by_zipcode.csv')

         Irma  Ian  Wilma  Frances  Jeanne  Charley  Ivan  Michael
32002     0.0  0.0    0.0      0.0     1.0      0.0   0.0      1.0
32003  1285.0  1.0    0.0    133.0   138.0      0.0   8.0    138.0
32004     0.0  2.0    0.0      0.0     0.0      0.0   0.0      0.0
32007    10.0  1.0    0.0     35.0    19.0      0.0   0.0     19.0
32008   687.0  0.0    0.0    147.0   190.0      2.0   0.0    190.0


# Performing regression

In [194]:
from sklearn.linear_model import LinearRegression

# Convert the index to a column
re_data['zipcode'] = re_data.index

# Use the zipcode column as the independent variable and the rest of the columns as the dependent variable
x = re_data[['zipcode']]
y = re_data.drop(columns=['zipcode'])

# Fit the model
model = LinearRegression().fit(x, y)

# Predict the number of claims for a new zipcode
new_zipcode = 32000
prediction = model.predict([[new_zipcode]])
print(prediction)

[[1388.36336194  197.91713759  339.60061059  211.42105127  101.92773028
    88.45956491  205.56342967  101.92773028]]


