In [10]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

In [11]:
data2024 = pd.read_csv('StormEvents_details-ftp_v1.0_d2024_c20250317.csv')
data2023 = pd.read_csv('StormEvents_details-ftp_v1.0_d2023_c20250317.csv')
data2022 = pd.read_csv('StormEvents_details-ftp_v1.0_d2022_c20241121.csv')
data2021 = pd.read_csv('StormEvents_details-ftp_v1.0_d2021_c20240716.csv')
data2020 = pd.read_csv('StormEvents_details-ftp_v1.0_d2020_c20240620.csv')
stormdata2 = pd.concat([data2020, data2021, data2022, data2023, data2024], ignore_index=True)
stormdata3 = stormdata2.drop(columns=[
    'EPISODE_ID', 'EVENT_ID', 'BEGIN_DATE_TIME', 'END_DATE_TIME',
    'MONTH_NAME', 'STATE', 'CZ_NAME', 'WFO', 'BEGIN_LOCATION',
    'END_LOCATION', 'BEGIN_AZIMUTH', 'END_AZIMUTH', 'TOR_OTHER_WFO',
    'TOR_OTHER_CZ_STATE', 'TOR_OTHER_CZ_NAME', 'CZ_TIMEZONE',
    'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'SOURCE', 'MAGNITUDE_TYPE',
    'FLOOD_CAUSE', 'CATEGORY', 'TOR_F_SCALE', 'DATA_SOURCE'])

stormdata3.head(n=10)

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,STATE_FIPS,YEAR,EVENT_TYPE,CZ_TYPE,...,MAGNITUDE,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_FIPS,BEGIN_RANGE,END_RANGE,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON
0,202006,24,1620,202006,24,1620,13,2020,Thunderstorm Wind,C,...,50.0,,,,1.0,1.0,31.7,-83.89,31.7,-83.89
1,202006,20,1930,202006,20,1930,20,2020,Hail,C,...,1.0,,,,8.0,8.0,39.7571,-99.6684,39.7571,-99.6684
2,202006,3,1550,202006,3,1550,20,2020,Hail,C,...,0.75,,,,14.0,14.0,39.9137,-101.9753,39.9137,-101.9753
3,202006,19,1900,202006,19,1900,20,2020,Thunderstorm Wind,C,...,52.0,,,,2.0,2.0,39.34,-101.37,39.34,-101.37
4,202006,20,1900,202006,20,1900,20,2020,Hail,C,...,1.25,,,,1.0,1.0,39.84,-99.89,39.84,-99.89
5,202006,20,1837,202006,20,1837,20,2020,Hail,C,...,0.75,,,,6.0,6.0,39.9397,-99.8877,39.9397,-99.8877
6,202006,23,1511,202006,23,1511,8,2020,Hail,C,...,0.75,,,,6.0,6.0,38.7204,-102.6783,38.7204,-102.6783
7,202006,26,1830,202006,26,1830,8,2020,Hail,C,...,0.75,,,,3.0,3.0,39.9134,-102.2277,39.9134,-102.2277
8,202006,26,1845,202006,26,1850,8,2020,Hail,C,...,1.75,,,,3.0,3.0,39.86,-102.18,39.86,-102.18
9,202006,9,1344,202006,9,1344,20,2020,High Wind,Z,...,59.0,,,,,,,,,


In [12]:
import pandas as pd
from datetime import datetime

def compute_duration(row):
    # --- Parse BEGIN date/time ---
    begin_year = row['BEGIN_YEARMONTH'] // 100    # e.g. 202006 -> year=2020
    begin_month = row['BEGIN_YEARMONTH'] % 100    # e.g. 202006 -> month=06
    begin_day = row['BEGIN_DAY']
    
    # Convert BEGIN_TIME (like 1620) into hours/minutes
    begin_time_str = f"{row['BEGIN_TIME']:04d}"   # ensures 4 digits, e.g. "1620"
    begin_hour = int(begin_time_str[:2])
    begin_minute = int(begin_time_str[2:])
    
    # Create a datetime object for the begin time
    begin_dt = datetime(begin_year, begin_month, begin_day, begin_hour, begin_minute)
    
    # --- Parse END date/time ---
    end_year = row['END_YEARMONTH'] // 100
    end_month = row['END_YEARMONTH'] % 100
    end_day = row['END_DAY']
    
    end_time_str = f"{row['END_TIME']:04d}"
    end_hour = int(end_time_str[:2])
    end_minute = int(end_time_str[2:])
    
    # Create a datetime object for the end time
    end_dt = datetime(end_year, end_month, end_day, end_hour, end_minute)
    
    # Return the difference (as a Timedelta)
    return end_dt - begin_dt

# Example usage:
# Assuming 'stormdata' is your DataFrame
stormdata3['duration'] = stormdata3.apply(compute_duration, axis=1)

# The new 'duration' column will be a pandas Timedelta.
# If you want it in hours (numeric), you could do:
stormdata3['duration_hours'] = stormdata3['duration'].dt.total_seconds() / 3600

# Drop the 'duration' column, keep only 'duration_hours'
stormdata = stormdata3.drop(columns=['duration', 'END_YEARMONTH', 'END_DAY', 'END_TIME'])

stormdata.head(n=10)


Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,STATE_FIPS,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,INJURIES_DIRECT,INJURIES_INDIRECT,...,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_FIPS,BEGIN_RANGE,END_RANGE,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,duration_hours
0,202006,24,1620,13,2020,Thunderstorm Wind,C,321,0,0,...,,,,1.0,1.0,31.7,-83.89,31.7,-83.89,0.0
1,202006,20,1930,20,2020,Hail,C,137,0,0,...,,,,8.0,8.0,39.7571,-99.6684,39.7571,-99.6684,0.0
2,202006,3,1550,20,2020,Hail,C,23,0,0,...,,,,14.0,14.0,39.9137,-101.9753,39.9137,-101.9753,0.0
3,202006,19,1900,20,2020,Thunderstorm Wind,C,193,0,0,...,,,,2.0,2.0,39.34,-101.37,39.34,-101.37,0.0
4,202006,20,1900,20,2020,Hail,C,137,0,0,...,,,,1.0,1.0,39.84,-99.89,39.84,-99.89,0.0
5,202006,20,1837,20,2020,Hail,C,137,0,0,...,,,,6.0,6.0,39.9397,-99.8877,39.9397,-99.8877,0.0
6,202006,23,1511,8,2020,Hail,C,17,0,0,...,,,,6.0,6.0,38.7204,-102.6783,38.7204,-102.6783,0.0
7,202006,26,1830,8,2020,Hail,C,125,0,0,...,,,,3.0,3.0,39.9134,-102.2277,39.9134,-102.2277,0.0
8,202006,26,1845,8,2020,Hail,C,125,0,0,...,,,,3.0,3.0,39.86,-102.18,39.86,-102.18,0.083333
9,202006,9,1344,20,2020,High Wind,Z,2,0,0,...,,,,,,,,,,0.0


In [13]:
stormdata.rename(columns={'BEGIN_YEARMONTH': 'MONTH'}, inplace=True)
stormdata['MONTH'] = stormdata['MONTH'].astype(str).str[-2:]
stormdata.head(n=10)

Unnamed: 0,MONTH,BEGIN_DAY,BEGIN_TIME,STATE_FIPS,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,INJURIES_DIRECT,INJURIES_INDIRECT,...,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_FIPS,BEGIN_RANGE,END_RANGE,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,duration_hours
0,6,24,1620,13,2020,Thunderstorm Wind,C,321,0,0,...,,,,1.0,1.0,31.7,-83.89,31.7,-83.89,0.0
1,6,20,1930,20,2020,Hail,C,137,0,0,...,,,,8.0,8.0,39.7571,-99.6684,39.7571,-99.6684,0.0
2,6,3,1550,20,2020,Hail,C,23,0,0,...,,,,14.0,14.0,39.9137,-101.9753,39.9137,-101.9753,0.0
3,6,19,1900,20,2020,Thunderstorm Wind,C,193,0,0,...,,,,2.0,2.0,39.34,-101.37,39.34,-101.37,0.0
4,6,20,1900,20,2020,Hail,C,137,0,0,...,,,,1.0,1.0,39.84,-99.89,39.84,-99.89,0.0
5,6,20,1837,20,2020,Hail,C,137,0,0,...,,,,6.0,6.0,39.9397,-99.8877,39.9397,-99.8877,0.0
6,6,23,1511,8,2020,Hail,C,17,0,0,...,,,,6.0,6.0,38.7204,-102.6783,38.7204,-102.6783,0.0
7,6,26,1830,8,2020,Hail,C,125,0,0,...,,,,3.0,3.0,39.9134,-102.2277,39.9134,-102.2277,0.0
8,6,26,1845,8,2020,Hail,C,125,0,0,...,,,,3.0,3.0,39.86,-102.18,39.86,-102.18,0.083333
9,6,9,1344,20,2020,High Wind,Z,2,0,0,...,,,,,,,,,,0.0


In [29]:
columns = ['TOR_LENGTH', 'TOR_WIDTH', 'TOR_OTHER_CZ_FIPS', 
               'BEGIN_RANGE', 'END_RANGE', 
               'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON']
stormdata[columns] = stormdata[columns].fillna(0)
stormdata.head(n=10)

Unnamed: 0,MONTH,BEGIN_DAY,BEGIN_TIME,STATE_FIPS,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,INJURIES_DIRECT,INJURIES_INDIRECT,...,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_CZ_FIPS,BEGIN_RANGE,END_RANGE,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,duration_hours
0,6,24,1620,13,2020,Thunderstorm Wind,C,321,0,0,...,0.0,0.0,0.0,1.0,1.0,31.7,-83.89,31.7,-83.89,0.0
1,6,20,1930,20,2020,Hail,C,137,0,0,...,0.0,0.0,0.0,8.0,8.0,39.7571,-99.6684,39.7571,-99.6684,0.0
2,6,3,1550,20,2020,Hail,C,23,0,0,...,0.0,0.0,0.0,14.0,14.0,39.9137,-101.9753,39.9137,-101.9753,0.0
3,6,19,1900,20,2020,Thunderstorm Wind,C,193,0,0,...,0.0,0.0,0.0,2.0,2.0,39.34,-101.37,39.34,-101.37,0.0
4,6,20,1900,20,2020,Hail,C,137,0,0,...,0.0,0.0,0.0,1.0,1.0,39.84,-99.89,39.84,-99.89,0.0
5,6,20,1837,20,2020,Hail,C,137,0,0,...,0.0,0.0,0.0,6.0,6.0,39.9397,-99.8877,39.9397,-99.8877,0.0
6,6,23,1511,8,2020,Hail,C,17,0,0,...,0.0,0.0,0.0,6.0,6.0,38.7204,-102.6783,38.7204,-102.6783,0.0
7,6,26,1830,8,2020,Hail,C,125,0,0,...,0.0,0.0,0.0,3.0,3.0,39.9134,-102.2277,39.9134,-102.2277,0.0
8,6,26,1845,8,2020,Hail,C,125,0,0,...,0.0,0.0,0.0,3.0,3.0,39.86,-102.18,39.86,-102.18,0.083333
9,6,9,1344,20,2020,High Wind,Z,2,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
