# HavocMapper Analysis and Visuals
## By Multiple Collaborators
---

### Imports and bringing in cleaned CSV file

In [None]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Bring in refined CSV file
fema_disasters = pd.read_csv('Assets/CSVs/femaDisasters_clean.csv')

# Bring in summary CSV File
disasters_summary = pd.read_csv('Assets/CSVs/disasters_summary.csv')

## Alex's Section

In [None]:
disasters_alex = fema_disasters.copy()

In [None]:
grouped_data = disasters_alex.groupby(['incidentType', 'state', 'declarationTitle']).size().reset_index(name='count')
grouped_data.head(15)

In [None]:
disaster_by_state = disasters_alex.groupby('state').size().reset_index(name='total_disasters')
disaster_by_state = disaster_by_state.sort_values(by='total_disasters', ascending=False)
disaster_by_state

In [None]:
disaster_by_type = disasters_alex.groupby(['state', 'incidentType']).size().reset_index(name='count')
disaster_by_type = disaster_by_type.sort_values(by='count', ascending=False)
disaster_by_type.head(25)

In [None]:
top_states = disaster_by_state.head(10)
plt.figure(figsize=(12, 6))
plt.bar(top_states['state'], top_states['total_disasters'], color='skyblue')
plt.xlabel('State')
plt.ylabel('Total Disasters')
plt.title('Top 10 States with the Most Natural Disasters')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
plt.savefig('Assets/Figures/Alex/Top10_States_MostDisasters')

In [None]:
top_disaster_types = disaster_by_type.groupby('incidentType')['count'].sum().reset_index()
top_disaster_types = top_disaster_types.sort_values(by='count', ascending=False).head(10)
plt.figure(figsize=(12, 6))
plt.bar(top_disaster_types['incidentType'], top_disaster_types['count'], color='orange')
plt.xlabel('Disaster Type')
plt.ylabel('Count')
plt.title('Top 10 Most Frequent Disaster Types')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
plt.savefig('Assets/Figures/Alex/Top10_MostFrequent_Disasters')

In [None]:
most_common_disasters = disaster_by_type.loc[
    disaster_by_type.groupby('state')['count'].idxmax()]
most_common_disasters = most_common_disasters.sort_values(by='count', ascending=False)
most_common_disasters


In [None]:
top_states_disasters = most_common_disasters.head(10)
states = top_states_disasters['state']
disasters = top_states_disasters['incidentType']
counts = top_states_disasters['count']

plt.figure(figsize=(12, 6))
plt.bar(states, counts, color='violet')
for i, disaster in enumerate(disasters):
    plt.text(i, counts.iloc[i] + 0.5, disaster, ha='center', fontsize=8)
plt.xlabel('State')
plt.ylabel('Number of Disasters')
plt.title('Most Common Natural Disasters in Each Top State', fontsize=16)
plt.xticks(rotation=45)
plt.tight_layout()
plt.xticks(rotation=60, fontsize=12)
plt.yticks(fontsize=12)
plt.show()
plt.savefig('Assets/Figures/Alex/MostCommonDisasters_byTopStates')

In [None]:
incident_counts = disasters_alex['incidentType'].value_counts()
max_disasters = incident_counts.max()
min_disasters = incident_counts.min()
max_disaster_type = incident_counts.idxmax()
min_disaster_type = incident_counts.idxmin()
print(f"Maximum disasters: {max_disasters} ({max_disaster_type})")
print(f"Minimum disasters: {min_disasters} ({min_disaster_type})")

In [None]:
texas_disasters = disasters_alex[disasters_alex['state'] == 'TX']
num_texas_disasters = texas_disasters.shape[0]
print(f"Texas had {num_texas_disasters} natural disasters.")

In [None]:
disasters_alex['declarationDate'] = pd.to_datetime(disasters_alex['declarationDate'], errors='coerce')
data = disasters_alex.dropna(subset=['declarationDate'])
data['Month'] = disasters_alex['declarationDate'].dt.month
def assign_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    elif month in [9, 10, 11]:
        return 'Fall'
data['Season'] = data['Month'].apply(assign_season)
monthly_counts = data['Month'].value_counts().sort_index()
seasonal_counts = data['Season'].value_counts()
print("Disaster Occurrences by Month:")
print(monthly_counts)

print("\nDisaster Occurrences by Season:")
print(seasonal_counts)

In [None]:
hurricane_counts = data[data['incidentType'] == 'Hurricane'].groupby('state').size()
most_hurricanes_state = hurricane_counts.idxmax()
most_hurricanes_count = hurricane_counts.max()
print(f"The state with the most hurricanes is {most_hurricanes_state} with {most_hurricanes_count} hurricanes.")


In [None]:
severe_storm_counts = data[data['incidentType'] == 'Severe Storm'].groupby('state').size()
most_severe_storms_state = severe_storm_counts.idxmax()
most_severe_storms_count = severe_storm_counts.max()
print(f"The state with the most severe storms is {most_severe_storms_state} with {most_severe_storms_count} severe storms.")


## Avenika's Section

In [None]:
disasters_avenika = fema_disasters.copy()

In [None]:
earliest_declaration_year = disasters_avenika["declarationDate"].min()
earliest_declaration_year

In [None]:
latest_declaration_year = disasters_avenika["declarationDate"].max()
latest_declaration_year

In [None]:
total_of_natural_disaster = disasters_avenika["incidentType"].count()
total_of_natural_disaster

In [None]:
# Counts of declaration types 
# DR = Major Disasters, EM = Emergency Declatation, FM = Fire Management)
# For more info see README (in progress)
disasters_avenika['declarationType'].value_counts()

In [None]:
# Counts of natural disaster types
disasters_avenika['incidentType'].value_counts()

In [None]:
# Validating no NaNs
disasters_avenika.count()

In [None]:
# Filter: Natural disasters per State
disasters_avenika['state'].value_counts()

In [None]:
# Filter: Natural disasters per hit area
disasters_avenika['designatedArea'].value_counts()

In [None]:
# Extract decleration_year from declarationDate

declarationDate = disasters_avenika["declarationDate"]
df= pd.DataFrame(declarationDate)
df["declarationDate"] = pd.to_datetime(df["declarationDate"])
df["declaration_year"] = df["declarationDate"].dt.year
print(df)

# Count disasters by year
#yearly_disasters = ["declaration_year"].value_counts().sort_index()

# Plot trends over time
# plt.plot(yearly_disasters.index, yearly_disasters.values, marker='o')
# plt.title("Number of Disasters Over Time")
# plt.xlabel("Year")
# plt.ylabel("Count of Disasters")
# plt.grid()
# plt.show()

In [None]:
declaration_year = df["declaration_year"]
declaration_year.head()

In [None]:
#adding declaration_year to disasters_avenika data set 
disasters_avenika["declaration_year"] = df["declaration_year"]
disasters_avenika.head()

In [None]:
#Count Severe Stroms trend by year 

sever_storms_by_year = disasters_avenika.groupby("declaration_year")["incidentType"].value_counts().sort_index()
sever_storms_by_year

In [None]:
disasters_avenika["incidentType"].value_counts()

In [None]:
# Count Severe Storm data by year
# Your original code, generating the count of incident types by year
sever_storms_by_year = disasters_avenika.groupby("declaration_year")["incidentType"].value_counts().sort_index()

# Filter for 'Severe Storm' and create a new series for plotting
severe_storms = sever_storms_by_year.xs("Severe Storm", level="incidentType")

# Plotting the data
severe_storms.plot(kind='line', figsize=(10, 6), color='b', marker='o')

# Adding title and labels
plt.title('Trend of Severe Storm by Year')
plt.xlabel('Year')
plt.ylabel('Number of Severe Storm Storms')
plt.grid(True)

# Show the plot
plt.show()

In [None]:
#Statistial Data on Sever Storme 
sever_storm_data = severe_storms.describe()
sever_storm_data

In [None]:
#Count Hurricane trend by year 
# Your original code, generating the count of incident types by year
hurricane_by_year = disasters_avenika.groupby("declaration_year")["incidentType"].value_counts().sort_index()

# Filter for 'Huricanes' and create a new series for plotting
hurricane_data = hurricane_by_year.xs("Hurricane", level="incidentType")

# Plotting the data
hurricane_data.plot(kind='line', figsize=(10, 6), color='b', marker='o')

# Adding title and labels
plt.title('Trend of Hurricanes by Year')
plt.xlabel('Year')
plt.ylabel('Number of Hurricanes')
plt.grid(True)

# Show the plot
plt.show()

In [None]:
#Statistial Data on Sever Storme 
hurricane_describe = hurricane_data.describe()
hurricane_describe

In [None]:
#Count Flood trend by year 
# Your original code, generating the count of incident types by year
flood_by_year = disasters_avenika.groupby("declaration_year")["incidentType"].value_counts().sort_index()

# Filter for 'Severe Storm' and create a new series for plotting
flood_data = flood_by_year.xs("Flood", level="incidentType")

# Plotting the data
flood_data.plot(kind='line', figsize=(10, 6), color='b', marker='o')

# Adding title and labels
plt.title('Trend of Floods by Year')
plt.xlabel('Year')
plt.ylabel('Number of Floods')
plt.grid(True)

# Show the plot
plt.show()

In [None]:
#Statistial Data on Flood 
flood_data = flood_by_year.describe()
flood_data

## Claudia's Section

In [None]:
disasters_claudia = fema_disasters.copy()

In [None]:
# Sample top of data
disasters_claudia.head()

In [None]:
# Convert dates to datetime
disasters_claudia['declarationDate'] = pd.to_datetime(disasters_claudia['declarationDate'])
disasters_claudia['incidentBeginDate'] = pd.to_datetime(disasters_claudia['incidentBeginDate'])

# Calculate time to declare
disasters_claudia['time_to_declare'] = (disasters_claudia['declarationDate'] - disasters_claudia['incidentBeginDate']).dt.days

# # Summary statistics for time_to_declare
disasters_claudia['time_to_declare'].describe()

In [None]:
program_cols = ['ihProgramDeclared', 'iaProgramDeclared', 'paProgramDeclared', 'hmProgramDeclared']
disasters_claudia[program_cols].sum()

In [None]:
disasters_claudia['total_programs'] = disasters_claudia[program_cols].sum(axis = 1)

In [None]:
#correlation matrix
program_cols = ['ihProgramDeclared', 'iaProgramDeclared', 'paProgramDeclared', 'hmProgramDeclared']
program_cols

In [None]:
#corelation matrix
disasters_claudia[program_cols].corr()

In [None]:
state_disaster_count = disasters_claudia['state'].value_counts()

# Plot
state_disaster_count.plot(kind='bar', figsize=(10, 6), title="Disaster Count by State")
plt.xlabel("State")
plt.ylabel("Number of Disasters")
plt.show()

In [None]:
# Extract year from declarationDate
disasters_claudia['declarationYear'] = disasters_claudia['declarationDate'].dt.year

# Count disasters by year
yearly_disasters = disasters_claudia['declarationYear'].value_counts().sort_index()

# Plot trends over time
plt.plot(yearly_disasters.index, yearly_disasters.values, marker='o')
plt.title("Number of Disasters Over Time")
plt.xlabel("Year")
plt.ylabel("Count of Disasters")
plt.grid()
plt.show()

In [None]:
# Filter disasters between 2000 and 2010
disasters_claudia['declarationDate'] = pd.to_datetime(disasters_claudia['declarationDate'])
disasters_2000_2010 = disasters_claudia[
    (disasters_claudia['declarationDate'] >= '2000-01-01') & (disasters_claudia['declarationDate'] <= '2010-12-31')
]

# Display the filtered dataset
disasters_2000_2010[['incidentType', 'state', 'declarationDate']]


In [None]:
# Extract year from declarationDate
disasters_2000_2010['year'] = disasters_2000_2010['declarationDate'].dt.year

# Count disasters by year
yearly_disasters = disasters_2000_2010['year'].value_counts().sort_index()

# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(yearly_disasters.index, yearly_disasters.values, marker='o', linestyle='-', label="Total Disasters")
plt.title("Number of Disasters (2000-2010)")
plt.xlabel("Year")
plt.ylabel("Number of Disasters")
plt.grid()
plt.legend()
plt.show()

In [None]:
# Filter disasters for 2005
disasters_2005 = disasters_claudia[disasters_claudia['declarationDate'].dt.year == 2005]

# Display the dataset for 2005
disasters_2005[['incidentType', 'state', 'declarationDate']]

In [None]:
# Count disasters by state
state_count_2005 = disasters_2005['state'].value_counts()

# Display results
print(state_count_2005)


In [None]:
# Extract the month from the declaration date
disasters_2005['month'] = disasters_2005['declarationDate'].dt.month

# Count disasters by month
monthly_disasters_2005 = disasters_2005['month'].value_counts().sort_index()

# Display results
monthly_disasters_2005


Above we see that in the year 2005 september there was the highest spike of natural disasters 

In [None]:
# Filter for a specific incident type (e.g., "Hurricane")
hurricane_2005 = disasters_2005[disasters_2005['incidentType'] == 'Hurricane']

# Display hurricane data for 2005
hurricane_2005[['state', 'declarationDate']]


In [None]:
# Group data by year and count the number of disasters
yearly_disasters = disasters_claudia['declarationYear'].value_counts().sort_index()

# Calculate percentage change year-over-year
yearly_disasters_percentage_change = yearly_disasters.pct_change() * 100  # Convert to percentage

# Combine the counts and percentage change into a single DataFrame for better visualization
yearly_disasters_summary = pd.DataFrame({
    'Disaster Count': yearly_disasters,
    'Percentage Change (%)': yearly_disasters_percentage_change
})

# Display the summary
yearly_disasters_summary


In [None]:
plt.figure(figsize=(10, 6))
plt.bar(yearly_disasters.index, yearly_disasters.values, alpha=0.7, label="Disaster Count")
plt.plot(yearly_disasters.index, yearly_disasters_percentage_change, marker='o', color='orange', label="Percentage Change (%)")
plt.title("Year-over-Year Changes in Disasters")
plt.xlabel("Year")
plt.ylabel("Disaster Count and Percentage Change")
plt.axhline(0, color='gray', linestyle='--', linewidth=0.8)
plt.legend()
plt.grid()
plt.show()

In [None]:
# Find the year with the largest percentage change
max_change_year = yearly_disasters_summary['Percentage Change (%)'].idxmax()
max_change_value = yearly_disasters_summary['Percentage Change (%)'].max()

# Calculate the average percentage change (excluding the first NaN value)
average_change = yearly_disasters_summary['Percentage Change (%)'].mean()

# Print results
print(f"Year with the largest percentage change: {max_change_year} ({max_change_value:.2f}%)")
print(f"Average percentage change: {average_change:.2f}%")

Above we can see that Natural Disasters declared by FEMA over time has significantly increased

In [None]:
time_by_state = disasters_claudia.groupby('state')['time_to_declare'].mean().sort_values()
time_by_state

In [None]:
time_by_incident = disasters_claudia.groupby('incidentType')['time_to_declare'].mean().sort_values()
time_by_incident

In [None]:
plt.figure(figsize=(12, 6))
plt.scatter(disasters_claudia['state'], disasters_claudia['time_to_declare'], alpha=0.5)
plt.title("Time to Declare by State")
plt.xlabel("State")
plt.ylabel("Days to Declare")
plt.xticks(rotation=90)
plt.show()

In [None]:
disasters_claudia['cycle_time'] = disasters_claudia['declarationDate'] - disasters_claudia['incidentBeginDate']
disasters_claudia['cycle_time'] = disasters_claudia['cycle_time'].dt.days
cycle_time_by_state = disasters_claudia.groupby('state')['cycle_time'].mean().sort_values()
cycle_time_by_state

In [None]:
# Summary statistics for cycle time by incident type
incident_type_analysis = disasters_claudia.groupby('incidentType')['cycle_time'].describe()

# Display the result
print(incident_type_analysis)

In [None]:
# Average cycle time by incident type
average_cycle_time = disasters_claudia.groupby('incidentType')['cycle_time'].mean().sort_values()

# Display the result
print(average_cycle_time)

In [None]:
# Plot average cycle time
plt.figure(figsize=(12, 6))
average_cycle_time.plot(kind='bar', color='skyblue')
plt.title('Average Cycle Time by Incident Type')
plt.xlabel('Incident Type')
plt.ylabel('Average Cycle Time (days)')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
# Create a box plot
plt.figure(figsize=(12, 6))
sns.boxplot(x='incidentType', y='cycle_time', data=disasters_claudia, palette='Set3')
plt.title('Cycle Time Distribution by Incident Type')
plt.xlabel('Incident Type')
plt.ylabel('Cycle Time (days)')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
# Sort the data by declarationDate
disasters_claudia_sorted = disasters_claudia.sort_values(by='declarationDate')

# Display the sorted dataset
disasters_claudia_sorted[['incidentType', 'state', 'declarationDate']]

In [None]:
# Filter by a specific incident type
specific_incident = disasters_claudia_sorted[disasters_claudia_sorted['incidentType'] == 'Fire']

# Display states and dates for the specific incident type
print(specific_incident[['state', 'declarationDate']])

In [None]:
# Count disasters by state
state_disaster_count = disasters_claudia['state'].value_counts()

# Display the state with the most disasters
most_disaster_state = state_disaster_count.idxmax()
most_disaster_count = state_disaster_count.max()
print(f"The state with the most disasters is {most_disaster_state} with {most_disaster_count} disasters.")

In [None]:
# Filter the dataset for the state with the most disasters
state_with_most_disasters = disasters_claudia[disasters_claudia['state'] == most_disaster_state]

# Count incident types in that state
incident_type_count = state_with_most_disasters['incidentType'].value_counts()

# Display the most common incident type
most_common_incident = incident_type_count.idxmax()
most_common_count = incident_type_count.max()
print(f"The most common incident type in {most_disaster_state} is {most_common_incident} with {most_common_count} occurrences.")

In [None]:
# Group by state and incident type, then count
state_incident_summary = disasters_claudia.groupby(['state', 'incidentType']).size().unstack(fill_value=0)

# Find the most common incident type for each state
most_common_by_state = state_incident_summary.idxmax(axis=1)
most_common_count_by_state = state_incident_summary.max(axis=1)

# Combine results into a DataFrame
state_summary = pd.DataFrame({
    'Most Common Incident Type': most_common_by_state,
    'Count of Most Common Type': most_common_count_by_state,
    'Total Disasters': state_disaster_count
}).reset_index().rename(columns={'index': 'State'})

# Display the full summary
print(state_summary)

## Neel's Section