In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('ibtracs.NI.list.v04r01.csv', low_memory=False)
df

Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,...,BOM_GUST_PER,REUNION_GUST,REUNION_GUST_PER,USA_SEAHGT,USA_SEARAD_NE,USA_SEARAD_SE,USA_SEARAD_SW,USA_SEARAD_NW,STORM_SPEED,STORM_DIR
0,,Year,,,,,,,degrees_north,degrees_east,...,second,kts,second,ft,nmile,nmile,nmile,nmile,kts,degrees
1,1842298N11080,1842,1,NI,BB,UNNAMED,1842-10-25 03:00:00,NR,10.9,80.3,...,,,,,,,,,9,265
2,1842298N11080,1842,1,NI,BB,UNNAMED,1842-10-25 06:00:00,NR,10.9,79.8,...,,,,,,,,,9,265
3,1842298N11080,1842,1,NI,BB,UNNAMED,1842-10-25 09:00:00,NR,10.8,79.4,...,,,,,,,,,9,265
4,1842298N11080,1842,1,NI,BB,UNNAMED,1842-10-25 12:00:00,NR,10.8,78.9,...,,,,,,,,,9,265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62477,2025331N07081,2025,103,NI,BB,DITWAH,2025-12-02 00:00:00,NR,12.5,80.6,...,,,,,,,,,2,130
62478,2025331N07081,2025,103,NI,BB,DITWAH,2025-12-02 03:00:00,NR,12.4,80.8,...,,,,,,,,,4,125
62479,2025331N07081,2025,103,NI,BB,DITWAH,2025-12-02 06:00:00,NR,12.3,80.9,...,,,,,,,,,2,160
62480,2025331N07081,2025,103,NI,BB,DITWAH,2025-12-02 09:00:00,NR,12.2,80.8,...,,,,,,,,,3,250


In [5]:
CORE_COLUMNS = [
    'SID', 'SEASON', 'NUMBER', 'BASIN', 'SUBBASIN', 'NAME',
    'ISO_TIME', 'NATURE', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES',
    'WMO_AGENCY', 'TRACK_TYPE', 'DIST2LAND', 'LANDFALL', 'IFLAG'
]

In [6]:
df['BASIN'] = df['BASIN'].astype(str).str.strip().str.upper()
df['SUBBASIN'] = df['SUBBASIN'].astype(str).str.strip().str.upper()
df_filtered = df[(df['BASIN'] == 'NI') & (df['SUBBASIN'] == 'BB')].copy()
df_processed = df_filtered[CORE_COLUMNS].copy()

In [7]:
df_processed['LAT'] = pd.to_numeric(df_processed['LAT'], errors='coerce')
df_processed['LON'] = pd.to_numeric(df_processed['LON'], errors='coerce')
df_processed['ISO_TIME'] = pd.to_datetime(df_processed['ISO_TIME'], errors='coerce')
df_processed.dropna(subset=['LAT', 'LON', 'ISO_TIME'], inplace=True)

In [8]:
df_processed['LAT_rad'] = np.radians(df_processed['LAT'])
df_processed['LON_rad'] = np.radians(df_processed['LON'])
df_processed['prev_LAT_rad'] = df_processed.groupby('SID')['LAT_rad'].shift(1)
df_processed['prev_LON_rad'] = df_processed.groupby('SID')['LON_rad'].shift(1)
df_processed.dropna(subset=['prev_LAT_rad', 'prev_LON_rad'], inplace=True)

R = 6371.0
def haversine_distance(lat1, lon1, lat2, lon2):
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c
df_processed['Haversine Distance'] = haversine_distance(df_processed['prev_LAT_rad'], df_processed['prev_LON_rad'], df_processed['LAT_rad'], df_processed['LON_rad'])
df_processed.drop(columns=['LAT_rad', 'LON_rad', 'prev_LAT_rad', 'prev_LON_rad'], inplace=True)

In [9]:
df_processed['NAME'] = df_processed['NAME'].fillna('UNNAMED')

In [10]:
LAT_MIN, LAT_MAX, LON_MIN, LON_MAX = 20.0, 25.0, 87.0, 93.0
df_bangladesh_area = df_processed[
    (df_processed['LAT'] >= LAT_MIN) & (df_processed['LAT'] <= LAT_MAX) &
    (df_processed['LON'] >= LON_MIN) & (df_processed['LON'] <= LON_MAX)
].copy()

In [11]:
output_filename = 'bangladesh_cyclones_historical.csv'
df_bangladesh_area.to_csv(output_filename, index=False)

print(f"Historical dataset (1842-2025) for Bangladesh Area saved as: {output_filename}")
print(f"Total rows in the historical dataset: {len(df_bangladesh_area)}")

Historical dataset (1842-2025) for Bangladesh Area saved as: bangladesh_cyclones_historical.csv
Total rows in the historical dataset: 6714


In [12]:
data = pd.read_csv("/content/bangladesh_cyclones_historical.csv")
data

Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,WMO_WIND,WMO_PRES,WMO_AGENCY,TRACK_TYPE,DIST2LAND,LANDFALL,IFLAG,Haversine Distance
0,1877135N10083,1877,14,NI,BB,UNNAMED,1877-05-20 21:00:00,TS,22.8,87.1,,,,main,0,0,__________PP___,59.269827
1,1877135N10083,1877,14,NI,BB,UNNAMED,1877-05-21 00:00:00,TS,23.3,87.2,,,,main,0,0,__________OP___,56.531103
2,1877135N10083,1877,14,NI,BB,UNNAMED,1877-05-21 03:00:00,TS,23.6,87.3,,,,main,0,0,__________PO___,34.883381
3,1877135N10083,1877,14,NI,BB,UNNAMED,1877-05-21 06:00:00,TS,23.8,87.4,,,,main,0,0,__________OP___,24.458933
4,1877135N10083,1877,14,NI,BB,UNNAMED,1877-05-21 09:00:00,TS,24.0,87.4,,,,main,0,0,__________PP___,22.238985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6709,2024258N22091,2024,62,NI,BB,UNNAMED,2024-09-15 21:00:00,TS,22.9,87.1,,,,main,0,0,_____P_________,10.243114
6710,2024258N22091,2024,62,NI,BB,UNNAMED,2024-09-16 00:00:00,TS,23.0,87.0,30,990,newdelhi,main,0,0,_____O_________,15.115788
6711,2024295N15092,2024,85,NI,BB,DANA,2024-10-24 15:00:00,TS,20.1,87.3,60,986,newdelhi,main,66,23,P____O_________,39.367274
6712,2024295N15092,2024,85,NI,BB,DANA,2024-10-24 18:00:00,TS,20.5,87.1,55,988,newdelhi,main,23,0,O____O_________,49.125664


In [25]:
data['NAME'].unique()

array(['UNNAMED', 'WINNIE', 'BILLIE', 'HOPE', 'KIM', 'HERBERT', 'FORREST',
       'AKASH', 'SIDR', 'RASHMI', 'BIJLI', 'AILA', 'MAHASEN:VIYARU',
       'KOMEN', 'ROANU', 'MORA', 'TITLI', 'FANI', 'BULBUL:MATMO',
       'AMPHAN', 'YAAS', 'JAWAD', 'SITRANG', 'MOCHA', 'HAMOON', 'MIDHILI',
       'REMAL', 'DANA'], dtype=object)

In [26]:
len(data['NAME'].unique())

28