In [327]:
import pandas as pd
import numpy as np


# Reading the CSV document 

In [328]:
df = pd.read_csv("StormEvents_details-ftp_v1.0_d2024_c20251118.csv.")


# Pulling the head of the (First 5 rows) CSV

In [329]:
df.head()


Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,...,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,202404,30,2033,202404,30,2033,189851,1174463,OKLAHOMA,40,...,0.0,SSW,FREDERICK ARPT,34.3444,-98.983,34.3444,-98.983,A rather nebulous upper air pattern existed ac...,Frederick Municipal Airport (KFDR) observation.,CSV
1,202407,1,0,202407,5,900,193486,1195301,LOUISIANA,22,...,,,,,,,,An upper ridge of high pressure built in acros...,,CSV
2,202411,16,230,202411,18,1421,197838,1223377,OREGON,41,...,,,,,,,,A series of cold fronts the weekend of Nov. 16...,The Hog Pass SNOTEL reported an estimated 12 i...,CSV
3,202405,22,1230,202405,22,1615,191723,1184135,TEXAS,48,...,,,,,,,,A strong upper-level subtropical ridge/heat do...,Harlingen Valley International Airport (KHRL) ...,CSV
4,202405,21,1200,202405,21,1530,191723,1184133,TEXAS,48,...,,,,,,,,A strong upper-level subtropical ridge/heat do...,"By proxy, between locations in northern Kenedy...",CSV


In [330]:
df.shape

(69502, 51)

In [331]:
# Keep only tornado events
tornado_2024_df = df[df['EVENT_TYPE'].str.lower() == 'tornado'].copy()

# Drop columns that are mostly empty or irrelevant
cols_to_drop = [
    'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT', 'DEATHS_INDIRECT',
    'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE', 'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME',
    'CATEGORY', 'FLOOD_CAUSE', 'MAGNITUDE_TYPE',
    'BEGIN_RANGE', 'BEGIN_AZIMUTH', 'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH', 
    'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON', 'DAMAGE_PROPERTY', 
    'DAMAGE_CROPS', 'EPISODE_ID', 'STATE_FIPS', 'CZ_NAME', 'WFO', 'MAGNITUDE', 'DATA_SOURCE',
    'EPISODE_NARRATIVE','EVENT_NARRATIVE','EVENT_ID','BEGIN_DATE_TIME','CZ_TIMEZONE',
    'END_DATE_TIME','CZ_TYPE','CZ_FIPS', "END_DAY", "END_TIME", "END_YEARMONTH"
]

tornado_2024_df.drop(columns=cols_to_drop, inplace=True)

tornado_2024_df.reset_index(drop=True, inplace=True)

tornado_2024_df

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,STATE,YEAR,MONTH_NAME,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
0,202411,5,1101,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0
1,202402,27,1932,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0
2,202402,27,1934,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0
3,202402,27,1815,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0
4,202402,27,1917,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0
...,...,...,...,...,...,...,...,...,...,...,...
2124,202406,22,1702,WISCONSIN,2024,June,Tornado,NWS Storm Survey,EF0,5.57,75.0
2125,202406,22,1922,WISCONSIN,2024,June,Tornado,NWS Storm Survey,EF1,5.26,100.0
2126,202406,22,1942,WISCONSIN,2024,June,Tornado,NWS Storm Survey,EF0,1.41,40.0
2127,202406,28,2002,MISSOURI,2024,June,Tornado,Emergency Manager,EFU,0.15,20.0


# Renaming columns and dropping old names

In [332]:
tornado_2024_df['BEGIN_YEARMONTH'] = tornado_2024_df['BEGIN_YEARMONTH'].astype(str) #CONVERT TO STRING!!!!

tornado_2024_df['Year'] = tornado_2024_df['BEGIN_YEARMONTH'].str[:4].astype(int)   # USING SPLICING // first 4 digits
tornado_2024_df['Month'] = tornado_2024_df['BEGIN_YEARMONTH'].str[4:6].astype(int) # USING SPLICING // last 2 digits
tornado_2024_df.drop(columns=['BEGIN_YEARMONTH'], inplace=True) #DROP OLD COLUMN NAME

tornado_2024_df.rename(columns={
    'BEGIN_TIME':'TIME',
    'MONTH_NAME' : 'MONTH',
    'BEGIN_DAY' : "DAY"
},inplace=True)

tornado_2024_df.head()

Unnamed: 0,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH,Year,Month
0,5,1101,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0,2024,11
1,27,1932,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0,2024,2
2,27,1934,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0,2024,2
3,27,1815,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0,2024,2
4,27,1917,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0,2024,2


# Moving The Month and Year columns to the front

In [333]:
# Move 'Month' to position 0 (first column)
month_col = tornado_2024_df.pop('Month')   # remove it temporarily
tornado_2024_df.insert(0, 'Month', month_col)  # insert at position 0
# Same thing for Month
month_col = tornado_2024_df.pop('Year')   # remove it temporarily
tornado_2024_df.insert(0, 'Year', month_col)  # insert at position 0

tornado_2024_df.head()

Unnamed: 0,Year,Month,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
0,2024,11,5,1101,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0
1,2024,2,27,1932,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0
2,2024,2,27,1934,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0
3,2024,2,27,1815,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0
4,2024,2,27,1917,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0


# Changing the time to a format more readable, making sure it's a 4 digit values and adding a colon

In [334]:
tornado_2024_df['TIME'] = tornado_2024_df['TIME'].astype(str).str.zfill(4) # Make sure 4 digit value
tornado_2024_df['TIME'] = tornado_2024_df['TIME'].str[:2] + ':' + tornado_2024_df['TIME'].str[2:] #Adding a colon
tornado_2024_df['TIME'] = pd.to_datetime(tornado_2024_df['TIME'], format='%H:%M')


tornado_2024_df.head()

Unnamed: 0,Year,Month,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
0,2024,11,5,1900-01-01 11:01:00,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0
1,2024,2,27,1900-01-01 19:32:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0
2,2024,2,27,1900-01-01 19:34:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0
3,2024,2,27,1900-01-01 18:15:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0
4,2024,2,27,1900-01-01 19:17:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0


# Checking for null values summary (mean)

In [335]:
tornado_2024_df.isnull().mean()

Year           0.0
Month          0.0
DAY            0.0
TIME           0.0
STATE          0.0
YEAR           0.0
MONTH          0.0
EVENT_TYPE     0.0
SOURCE         0.0
TOR_F_SCALE    0.0
TOR_LENGTH     0.0
TOR_WIDTH      0.0
dtype: float64

# Checking for duplicate rows

In [336]:
tornado_2024_df.duplicated().sum()
#Returns two

np.int64(2)

In [337]:
tornado_2024_df[tornado_2024_df.duplicated()]
#Looking at those two rows. They are different rows of data so will NOT drop those.

Unnamed: 0,Year,Month,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
1816,2024,6,5,1900-01-01 18:59:00,MARYLAND,2024,June,Tornado,NWS Storm Survey,EF2,4.42,100.0
1857,2024,7,14,1900-01-01 22:48:00,ILLINOIS,2024,July,Tornado,NWS Employee,EFU,0.87,60.0


In [338]:
tornado_2024_df.head() #Looking at dataset after dropping and renaming rows

Unnamed: 0,Year,Month,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
0,2024,11,5,1900-01-01 11:01:00,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0
1,2024,2,27,1900-01-01 19:32:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0
2,2024,2,27,1900-01-01 19:34:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0
3,2024,2,27,1900-01-01 18:15:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0
4,2024,2,27,1900-01-01 19:17:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0


# Checking data types

In [339]:
tornado_2024_df.dtypes

Year                    int64
Month                   int64
DAY                     int64
TIME           datetime64[ns]
STATE                  object
YEAR                    int64
MONTH                  object
EVENT_TYPE             object
SOURCE                 object
TOR_F_SCALE            object
TOR_LENGTH            float64
TOR_WIDTH             float64
dtype: object

# Changing objects to strings

In [340]:
tornado_2024_df['STATE'] = tornado_2024_df['STATE'].astype('string')
tornado_2024_df['MONTH'] = tornado_2024_df['MONTH'].astype('string')
tornado_2024_df['EVENT_TYPE'] = tornado_2024_df['EVENT_TYPE'].astype('string')
tornado_2024_df['SOURCE'] = tornado_2024_df['SOURCE'].astype('string')
tornado_2024_df['TOR_F_SCALE'] = tornado_2024_df['TOR_F_SCALE'].astype('string')
tornado_2024_df['MONTH'] = tornado_2024_df['MONTH'].astype('string')

tornado_2024_df.head()

Unnamed: 0,Year,Month,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
0,2024,11,5,1900-01-01 11:01:00,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0
1,2024,2,27,1900-01-01 19:32:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0
2,2024,2,27,1900-01-01 19:34:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0
3,2024,2,27,1900-01-01 18:15:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0
4,2024,2,27,1900-01-01 19:17:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0


In [341]:
tornado_2024_df

Unnamed: 0,Year,Month,DAY,TIME,STATE,YEAR,MONTH,EVENT_TYPE,SOURCE,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH
0,2024,11,5,1900-01-01 11:01:00,IOWA,2024,November,Tornado,Trained Spotter,EF0,0.32,50.0
1,2024,2,27,1900-01-01 19:32:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.58,110.0
2,2024,2,27,1900-01-01 19:34:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,2.45,160.0
3,2024,2,27,1900-01-01 18:15:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF0,0.05,10.0
4,2024,2,27,1900-01-01 19:17:00,ILLINOIS,2024,February,Tornado,NWS Storm Survey,EF1,1.16,125.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2124,2024,6,22,1900-01-01 17:02:00,WISCONSIN,2024,June,Tornado,NWS Storm Survey,EF0,5.57,75.0
2125,2024,6,22,1900-01-01 19:22:00,WISCONSIN,2024,June,Tornado,NWS Storm Survey,EF1,5.26,100.0
2126,2024,6,22,1900-01-01 19:42:00,WISCONSIN,2024,June,Tornado,NWS Storm Survey,EF0,1.41,40.0
2127,2024,6,28,1900-01-01 20:02:00,MISSOURI,2024,June,Tornado,Emergency Manager,EFU,0.15,20.0


# Creating NEW CSV DOCUMENT with only 2024 data. Will combine....

In [None]:
tornado_2024_df.to_csv('C:/Users/Greg/Documents/tornado_2024_cleaned.csv', index=False)
