In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [82]:
#loading the raw csv of all storms
data = pd.read_csv("/content/drive/My Drive/GDToT/ECO482_Project/Data/raw_storm_data.csv")

In [83]:
#dropping columns that aren't useful
pd.set_option('display.max_columns', None)
data = data.drop(['BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH', 'END_DAY', 'END_TIME', 'DATA_SOURCE', 'DURATION',
              'DURATION_MINS', 'WFO', 'SOURCE', 'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE', 'CATEGORY', 'YEAR', 'MONTH_NAME',
              'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE', 'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 'BEGIN_RANGE', 'BEGIN_AZIMUTH',
              'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH', 'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON'], axis=1)

In [84]:
#dropping all rows with null values for TOR_F_SCALE
data = data.dropna(subset=['TOR_F_SCALE'])

#dropping all rows where TOR_F_SCALE is not on the EF scale
EF_SCALE = ['EF0', 'EF1', 'EF2', 'EF3', 'EF4', 'EF5', 'EFU']
data = data[data['TOR_F_SCALE'].astype(str).isin(EF_SCALE)]

In [85]:
#creating a sentiment index based on event and episode narrative text columns

!pip install pandas textblob nltk afinn
import nltk
from afinn import Afinn
from textblob import TextBlob

# Ensure required nltk data is downloaded
nltk.download('sentiwordnet')
nltk.download('wordnet')

afinn = Afinn()

#function to compute sentiment scores
def analyze_sentiment(text):
    # TextBlob sentiment (polarity ranges from -1 to 1)
    blob_score = TextBlob(text).sentiment.polarity

    # AFINN sentiment (ranges from negative to positive integer scores)
    afinn_score = afinn.score(text)

    return pd.Series([blob_score, afinn_score], index=['TextBlob_Score', 'AFINN_Score'])


data[['TextBlob_Score', 'AFINN_Score']] = data['EVENT_NARRATIVE'].str.lower().apply(analyze_sentiment)



[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [86]:
#creating dummy variables out of categories
data = pd.DataFrame(pd.get_dummies(data, columns=['STATE', 'CZ_TYPE', 'CZ_NAME', 'CZ_TIMEZONE', 'TOR_F_SCALE']))

#converting to 0,1
for col in data.columns:
    if data[col].dtype == 'bool':  # Check if column is boolean
        data[col] = data[col].astype(int)