In [2]:
# Project: Storm Events Analysis (NOAA)
# Notebook: 02_eda_overview.ipynb
# Goal: Explore trends, seasonality, and correlations
# Author: Brice Nelson
# Date: 2025-09-05

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.dpi"] = 130
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Paths
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
DATA_RAW = PROJECT_ROOT / "data" / "raw"
DATA_INTERIM = PROJECT_ROOT / "data" / "interim"
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"
REPORT_FIGS = PROJECT_ROOT / "reports" / "figures"
REPORT_FIGS.mkdir(parents=True, exist_ok=True)

In [3]:
storm_data = pd.read_csv("../data/raw/StormEvents_details-ftp_v1.0_d1950_c20170120.csv")
storm_data.head()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,CZ_TIMEZONE,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,MAGNITUDE,MAGNITUDE_TYPE,FLOOD_CAUSE,CATEGORY,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_WFO,TOR_OTHER_CZ_STATE,TOR_OTHER_CZ_FIPS,TOR_OTHER_CZ_NAME,BEGIN_RANGE,BEGIN_AZIMUTH,BEGIN_LOCATION,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,195004,28,1445,195004,28,1445,,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,28-APR-50 14:45:00,CST,28-APR-50 14:45:00,0,0,0,0,250K,0,,0,,,,F3,3.4,400,,,,,0,,,0,,,35.12,-99.2,35.17,-99.2,,,PUB
1,195004,29,1530,195004,29,1530,,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,29-APR-50 15:30:00,CST,29-APR-50 15:30:00,0,0,0,0,25K,0,,0,,,,F1,11.5,200,,,,,0,,,0,,,31.9,-98.6,31.73,-98.6,,,PUB
2,195007,5,1800,195007,5,1800,,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,05-JUL-50 18:00:00,CST,05-JUL-50 18:00:00,2,0,0,0,25K,0,,0,,,,F2,12.9,33,,,,,0,,,0,,,40.58,-75.7,40.65,-75.47,,,PUB
3,195007,5,1830,195007,5,1830,,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,05-JUL-50 18:30:00,CST,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,0,,,,F2,0.0,13,,,,,0,,,0,,,40.6,-76.75,,,,,PUB
4,195007,24,1440,195007,24,1440,,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,24-JUL-50 14:40:00,CST,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,0,,,,F0,0.0,33,,,,,0,,,0,,,41.63,-79.68,,,,,PUB


In [4]:
storm_data.tail()

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,CZ_TIMEZONE,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,MAGNITUDE,MAGNITUDE_TYPE,FLOOD_CAUSE,CATEGORY,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_WFO,TOR_OTHER_CZ_STATE,TOR_OTHER_CZ_FIPS,TOR_OTHER_CZ_NAME,BEGIN_RANGE,BEGIN_AZIMUTH,BEGIN_LOCATION,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
218,195012,2,1500,195012,2,1500,,10009716,ILLINOIS,17,1950,December,Tornado,C,119,MADISON,,02-DEC-50 15:00:00,CST,02-DEC-50 15:00:00,0,0,0,0,25K,0,,0,,,,F2,7.3,50,,,,,0,,,0,,,38.97,-90.05,39.0,-89.92,,,PUB
219,195012,2,1515,195012,2,1515,,10009717,ILLINOIS,17,1950,December,Tornado,C,117,MACOUPIN,,02-DEC-50 15:15:00,CST,02-DEC-50 15:15:00,3,0,1,0,25K,0,,0,,,,F2,11.5,50,,,,,0,,,0,,,39.0,-89.92,39.07,-89.72,,,PUB
220,195012,2,1600,195012,2,1600,,10009718,ILLINOIS,17,1950,December,Tornado,C,119,MADISON,,02-DEC-50 16:00:00,CST,02-DEC-50 16:00:00,0,0,0,0,2.5M,0,,0,,,,F3,4.7,200,,,,,0,,,0,,,38.75,-89.67,38.8,-89.6,,,PUB
221,195012,2,1600,195012,2,1600,,10009719,ILLINOIS,17,1950,December,Tornado,C,5,BOND,,02-DEC-50 16:00:00,CST,02-DEC-50 16:00:00,25,0,2,0,2.5M,0,,0,,,,F3,13.3,200,,,,,0,,,0,,,38.8,-89.6,38.9,-89.38,,,PUB
222,195012,2,1730,195012,2,1730,,10009720,ILLINOIS,17,1950,December,Tornado,C,157,RANDOLPH,,02-DEC-50 17:30:00,CST,02-DEC-50 17:30:00,0,0,0,0,25K,0,,0,,,,F1,9.6,50,,,,,0,,,0,,,38.17,-89.78,38.22,-89.62,,,PUB


In [5]:
storm_data.shape


(223, 51)

In [6]:
storm_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 223 entries, 0 to 222
Data columns (total 51 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   BEGIN_YEARMONTH     223 non-null    int64  
 1   BEGIN_DAY           223 non-null    int64  
 2   BEGIN_TIME          223 non-null    int64  
 3   END_YEARMONTH       223 non-null    int64  
 4   END_DAY             223 non-null    int64  
 5   END_TIME            223 non-null    int64  
 6   EPISODE_ID          0 non-null      float64
 7   EVENT_ID            223 non-null    int64  
 8   STATE               223 non-null    object 
 9   STATE_FIPS          223 non-null    int64  
 10  YEAR                223 non-null    int64  
 11  MONTH_NAME          223 non-null    object 
 12  EVENT_TYPE          223 non-null    object 
 13  CZ_TYPE             223 non-null    object 
 14  CZ_FIPS             223 non-null    int64  
 15  CZ_NAME             223 non-null    object 
 16  WFO     