## Load the Data

### 1.1. Load the data

In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import tensorflow as tf
import urllib.request, json
import os

from math import sqrt
from sklearn.metrics import mean_squared_error
from pandas_datareader import data
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA

2025-01-14 15:54:37.073210: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-14 15:54:37.154006: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-14 15:54:37.207438: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736870077.278690    4469 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736870077.298445    4469 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-14 15:54:37.453390: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [6]:
df=pd.read_csv('../data/us_disaster_declarations.csv')

### 1.2. Date Encoding

In [7]:
# Create 'incident_dtm' column to convert 'incident_begin_date' obj to datetime
df['incident_dtm']=pd.to_datetime(df['incident_begin_date'], format='%Y-%m-%dT%H:%M:%SZ')

df['incident_dtm'].head()

0   1953-05-02
1   1953-05-15
2   1953-05-29
3   1953-06-02
4   1953-06-06
Name: incident_dtm, dtype: datetime64[ns]

In [8]:
# Create subset_df with reformatted 'incident_dtm', 'incident_type', and 'state'
subset_df=df[['incident_dtm', 'incident_type', 'state']]

subset_df.head()

Unnamed: 0,incident_dtm,incident_type,state
0,1953-05-02,Tornado,GA
1,1953-05-15,Tornado,TX
2,1953-05-29,Flood,LA
3,1953-06-02,Tornado,MI
4,1953-06-06,Flood,MT


### 1.3. Disaster Encoding

In [9]:
# Encode the disaster types to dummies
disaster_dummies=pd.get_dummies(subset_df['incident_type'], dtype=int)

disaster_dummies.head()

Unnamed: 0,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,Flood,Freezing,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
# Combine disaster dummies and subset_df, remove 'incident_type' column
subset_df=pd.concat([subset_df.reset_index(drop=True), disaster_dummies.reset_index(drop=True)], axis=1)
subset_df.drop('incident_type', axis=1, inplace=True)
subset_df.head()

Unnamed: 0,incident_dtm,state,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
0,1953-05-02,GA,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,1953-05-15,TX,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,1953-05-29,LA,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1953-06-02,MI,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,1953-06-06,MT,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### 1.4. Time Axis Regularization/Resampling

In [11]:
def sum_months(group: pd.DataFrame) -> pd.DataFrame:
    '''Takes a yearly groupby object and sums features over months'''

    group=group.resample('ME').sum()

    return group

def resample_months(group: pd.DataFrame) -> pd.DataFrame:
    '''Takes working dataframe and resamples frequency to months.
    Returns updated dataframe'''

    # Set 'incident_dtm' as datetime axis
    group=group.set_index('incident_dtm')

    # Sum disasters in each month by year; removes duplicates where there was more than one disaster in a month
    group=group.groupby(group.index.year, group_keys=False).apply(sum_months)

    # Resample to monthly frequency
    group=group.resample('D').asfreq()

    # Fill missing values with 0
    group=group.fillna(0)

    # Convert everything to int
    group=group.astype(bool)

    # Reset the index, preserving the `incident_dtm`
    group.reset_index(inplace=True, drop=False)

    return group

# Use resampling function on subset_df
resampled_df=subset_df.groupby('state', group_keys=True).apply(resample_months, include_groups=False)

In [12]:
resampled_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,incident_dtm,Biological,Chemical,Coastal Storm,Dam/Levee Break,Drought,Earthquake,Fire,Fishing Losses,Flood,...,Severe Storm,Snowstorm,Terrorist,Tornado,Toxic Substances,Tropical Storm,Tsunami,Typhoon,Volcanic Eruption,Winter Storm
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AK,0,1953-10-31,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,1,1953-11-01,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,2,1953-11-02,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,3,1953-11-03,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,4,1953-11-04,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,5,1953-11-05,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,6,1953-11-06,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,7,1953-11-07,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,8,1953-11-08,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
AK,9,1953-11-09,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
