In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('/content/final_project_FEMA.csv')

In [None]:
#Check the first few rows of the data
df.head()

Unnamed: 0,specialNeeds,damagedCity,damagedStateAbbreviation,roofDamage,foundationDamage,disasterNumber,householdComposition,destroyed,residenceType,repairAmount,tsaEligible,grossIncome,waterLevel
0,0,SARASOTA,FL,0,0,4337,1,0,Condo,0.0,0,0.0,0.0
1,0,SULPHUR,LA,0,0,4559,4,0,Apartment,0.0,0,55000.0,0.0
2,0,HOUSTON,TX,0,0,4332,1,0,Apartment,0.0,1,23000.0,0.0
3,0,RICHMOND,TX,0,0,4332,1,0,House/Duplex,0.0,0,80000.0,0.0
4,0,OVIEDO,FL,0,0,4337,4,0,House/Duplex,0.0,0,0.0,0.0


In [None]:
#Get info on column types and rows
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4457134 entries, 0 to 4457133
Data columns (total 13 columns):
 #   Column                    Dtype  
---  ------                    -----  
 0   specialNeeds              int64  
 1   damagedCity               object 
 2   damagedStateAbbreviation  object 
 3   roofDamage                int64  
 4   foundationDamage          int64  
 5   disasterNumber            int64  
 6   householdComposition      int64  
 7   destroyed                 int64  
 8   residenceType             object 
 9   repairAmount              float64
 10  tsaEligible               int64  
 11  grossIncome               float64
 12  waterLevel                float64
dtypes: float64(3), int64(7), object(3)
memory usage: 442.1+ MB


In [None]:
#Summary stats for numerical columns
df.describe()

Unnamed: 0,specialNeeds,roofDamage,foundationDamage,disasterNumber,householdComposition,destroyed,repairAmount,tsaEligible,grossIncome,waterLevel
count,4457134.0,4457134.0,4457134.0,4457134.0,4457134.0,4457134.0,4457134.0,4457134.0,4457134.0,4457134.0
mean,0.03383295,0.03646873,0.005457094,4398.283,2.256037,0.001803177,361.0063,0.3857961,48132.83,0.8398361
std,0.180799,0.1874534,0.07367032,108.7934,1.458243,0.04242553,2188.7,0.4867828,555660.4,5.49764
min,0.0,0.0,0.0,4332.0,1.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,4337.0,1.0,0.0,0.0,0.0,3936.0,0.0
50%,0.0,0.0,0.0,4337.0,2.0,0.0,0.0,0.0,20000.0,0.0
75%,0.0,0.0,0.0,4393.0,3.0,0.0,0.0,1.0,40000.0,0.0
max,1.0,1.0,1.0,4611.0,29.0,1.0,66600.0,1.0,100000000.0,600.0


In [None]:
#Check for missing values
df.isnull().sum()

Unnamed: 0,0
specialNeeds,0
damagedCity,0
damagedStateAbbreviation,0
roofDamage,0
foundationDamage,0
disasterNumber,0
householdComposition,0
destroyed,0
residenceType,0
repairAmount,0


In [None]:
# Drop rows with any missing values
df = df.dropna()


### Descriptive Analysis ###



In [None]:
# Total cases where TSA was used
tsa_cases_by_state = df[df['tsaEligible'] == 1].groupby('damagedStateAbbreviation').size()
print(tsa_cases_by_state)

damagedStateAbbreviation
CA         1
FL    620821
GA         1
LA    110741
MD         1
NC      4364
PR    721230
TX    262383
VI         3
dtype: int64


In [None]:
#Average household income where TSA was used
income_tsa_used = df[df['tsaEligible'] == 1]['grossIncome'].mean()
print(income_tsa_used)

43569.614727733206


In [None]:
#Average houshold income where TSA was not used
income_tsa_not_used = df[df['tsaEligible'] == 0]['grossIncome'].mean()
print(income_tsa_not_used)

50999.09234804786


In [None]:
#Average family size where TSA was used
family_size_tsa_used = df[df['tsaEligible'] == 1]['householdComposition'].mean()
print(family_size_tsa_used)

2.219124245076459


In [None]:
#Average familt size where TSA was not used
family_size_tsa_not_used = df[df['tsaEligible'] == 0]['householdComposition'].mean()
print(family_size_tsa_not_used)

2.279223433466455


In [None]:
#Assessing if water level was a factor in TSA approval
water_level_tsa = df.groupby('tsaEligible')['waterLevel'].size()
print(water_level_tsa)

tsaEligible
0    2737589
1    1719545
Name: waterLevel, dtype: int64


In [None]:
#Average cost of repairs where TSA was used
repairs_cost_tsa_used = df[df['tsaEligible'] == 1]['repairAmount'].mean()
print(repairs_cost_tsa_used)

579.8480307813984


In [None]:
#Average cost of repairs where TSA was not used
repairs_cost_tsa_not_used = df[df['tsaEligible'] == 0]['repairAmount'].mean()
print(repairs_cost_tsa_not_used)

223.54662928949514


In [None]:
#Count of TSA uses by residence type
tsa_by_residence_type = df.groupby('residenceType').size()
print(tsa_by_residence_type)

residenceType
Apartment                    861144
Assisted Living Facility       5789
Boat                           3010
College Dorm                   1547
Condo                        124421
Correctional Facility           434
House/Duplex                2744981
Military Housing               2217
Mobile Home                  321032
Other                        177973
Townhouse                    186895
Travel Trailer                27478
Unknown                         213
dtype: int64


In [None]:
#Average foundation damage by TSA eligibility
foundation_damage_by_tsa = df.groupby('tsaEligible')['foundationDamage'].mean()
print(foundation_damage_by_tsa)

tsaEligible
0    0.003868
1    0.007988
Name: foundationDamage, dtype: float64


In [None]:
#Average roof damage by TSA eligibility
roof_damage_by_tsa = df.groupby('tsaEligible')['roofDamage'].mean()
print(roof_damage_by_tsa)

tsaEligible
0    0.021370
1    0.060507
Name: roofDamage, dtype: float64


### Disaster-Specific Analysis ###

In [None]:
#Hurricane Irma cases
hurricane_irma = df[df['disasterNumber'] == 4337]

#Total TSA cases for hurricane Irma
irma_tsa_cases = hurricane_irma['tsaEligible'].sum()
print(irma_tsa_cases)

#Average repair cost for hurrican Irma
irma_repair_cost = hurricane_irma['repairAmount'].mean()
print(irma_repair_cost)

615112
71.33926395344069


In [None]:
#Hurricane Maria cases
hurricane_maria = df[df['disasterNumber'] == 4339]

#Total TSA cases for hurricane Maria
maria_tsa_cases = hurricane_maria['tsaEligible'].sum()
print(maria_tsa_cases)

#Average repair cost for hurricane Maria
maria_repair_cost = hurricane_maria['repairAmount'].mean()
print(maria_repair_cost)

721232
491.19281078378606


In [None]:
#Hurricane Harvey cases
hurricane_harvey = df[df['disasterNumber'] == 4332]

#Total TSA cases for hurricane Harvey
harvey_tsa_cases = hurricane_harvey['tsaEligible'].sum()
print(harvey_tsa_cases)

#Average repair cost for hurricane Harvey
harvey_repair_cost = hurricane_harvey['repairAmount'].mean()
print(harvey_repair_cost)

262385
1003.5423508401957


In [None]:
#Hurricane Ida cases
hurricane_ida = df[df['disasterNumber'] == 4611]

#Total TSA cases for hurricane Ida
ida_tsa_cases = hurricane_ida['tsaEligible'].sum()
print(ida_tsa_cases)

#Average repair cost for hurricane Ida
ida_repair_cost = hurricane_ida['repairAmount'].mean()
print(ida_repair_cost)

110740
446.57724570253197


In [None]:
#Texas winter storm cases
texas_winter_storm = df[df['disasterNumber'] ==  4586]

#Total TSA cases for Texas Winter Storm
texas_storm_tsa_cases = texas_winter_storm['tsaEligible'].sum()
print(texas_winter_storm)

#Average repair cost for Texas Winter Storm
texas_storm_repair_cost = texas_winter_storm['repairAmount'].mean()
print(texas_storm_repair_cost)

         specialNeeds     damagedCity damagedStateAbbreviation  roofDamage  \
10                  0         HOUSTON                       TX           0   
52                  1         HOUSTON                       TX           0   
53                  0   GRAND PRAIRIE                       TX           0   
92                  0         GARLAND                       TX           0   
105                 0      SUGAR LAND                       TX           0   
...               ...             ...                      ...         ...   
4457101             0         CYPRESS                       TX           0   
4457102             0  CORPUS CHRISTI                       TX           0   
4457111             1         HOUSTON                       TX           0   
4457112             0       ARLINGTON                       TX           0   
4457132             0          AUSTIN                       TX           0   

         foundationDamage  disasterNumber  householdComposition

In [None]:
#Hurricane Laura cases
hurricane_laura = df[df['disasterNumber'] == 4559]

#Total TSA cases for hurricane Laura
laura_tsa_cases = hurricane_laura['tsaEligible'].sum()
print(laura_tsa_cases)

#Average repair cost for hurricane Laura
laura_repair_cost = hurricane_laura['repairAmount'].mean()
print(laura_repair_cost)

0
402.75644986484014


In [None]:
#Hurricane Florence cases
hurricane_florence = df[df['disasterNumber'] == 4393]

#Total TSA cases for hurricane Florence
florence_tsa_cases = hurricane_florence['tsaEligible'].sum()
print(florence_tsa_cases)

#Average repair cost for hurricane Florence
florence_repair_cost = hurricane_florence['repairAmount'].mean()
print(florence_repair_cost)

4364
502.82649339595554


In [None]:
#Hurricane Micheal cases
hurricane_michael = df[df['disasterNumber'] ==  4399]

#Total TSA cases for hurricane Micheal
michael_tsa_cases = hurricane_michael['tsaEligible'].sum()
print(michael_tsa_cases)

#Average repair cost for hurricane Micheal
michael_repair_cost = hurricane_michael['repairAmount'].mean()
print(michael_repair_cost)

5712
573.8608488987819
