# Filter for IBTrACS dataset

In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import os

In [2]:
# IBTRACS FILTERING CONFIGURATION
year = datetime.now().year
month = datetime.now().month
day = datetime.now().day
start_year = 1980
end_year = 2021
t_delta = 6 # number of hours to sample
time_sampling = list(np.arange(0, 24, t_delta))
natures = ['TS', 'NR', 'ET', 'MX', 'SS', 'DS']
track_types = ['main']

In [3]:
ibtracs_src = '../../data/ibtracs/full/ibtracs.ALL.list.v04r00.csv'
ibtracs_filtered_src = '../../data/ibtracs/filtered'
ibtracs_dst = os.path.join(ibtracs_filtered_src, f'ibtracs_{"-".join(track_types)}-tracks_{t_delta}h_{start_year}-{end_year}_{"-".join(natures)}.csv')
os.makedirs(ibtracs_filtered_src, exist_ok=True)

ibtracs_dst

'../../data/ibtracs/filtered/ibtracs_main-tracks_6h_1980-2021_TS-NR-ET-MX-SS-DS.csv'

In [4]:
ibtracs = pd.read_csv(ibtracs_src, header=0, keep_default_na=False)
ibtracs.head()

  ibtracs = pd.read_csv(ibtracs_src, header=0, keep_default_na=False)


Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,...,BOM_GUST_PER,REUNION_GUST,REUNION_GUST_PER,USA_SEAHGT,USA_SEARAD_NE,USA_SEARAD_SE,USA_SEARAD_SW,USA_SEARAD_NW,STORM_SPEED,STORM_DIR
0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 03:00:00,NR,10.9,80.3,...,,,,,,,,,9,266
1,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 06:00:00,NR,10.8709,79.8265,...,,,,,,,,,9,267
2,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 09:00:00,NR,10.8431,79.3524,...,,,,,,,,,9,267
3,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 12:00:00,NR,10.8188,78.8772,...,,,,,,,,,9,267
4,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 15:00:00,NR,10.8,78.4,...,,,,,,,,,9,268


In [5]:
len(ibtracs)

711159

# Select only records from 1980 to 2021

In [6]:
pd.to_datetime(ibtracs['ISO_TIME']).dt.year.unique()

array([1842, 1845, 1848, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858,
       1859, 1860, 1861, 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869,
       1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880,
       1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891,
       1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902,
       1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913,
       1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924,
       1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935,
       1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946,
       1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957,
       1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968,
       1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979,
       1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 19

In [7]:
ibtracs = ibtracs[pd.to_datetime(ibtracs['ISO_TIME']).dt.year.isin(np.arange(start_year,end_year+1))]

In [8]:
len(ibtracs)

281736

# Select only main tracks

In [9]:
ibtracs['TRACK_TYPE'].unique()

array(['main', 'spur-other', 'spur-merge', 'spur-split'], dtype=object)

In [10]:
ibtracs = ibtracs[ibtracs['TRACK_TYPE'].isin(track_types)]

In [11]:
len(ibtracs)

278519

# Select Natures TS, SS, ET, NR, MX and DS

In [12]:
ibtracs['NATURE'].unique()

array(['TS', 'NR', 'ET', 'MX', 'SS', 'DS'], dtype=object)

In [13]:
ibtracs = ibtracs[ibtracs['NATURE'].isin(natures)]

In [14]:
len(ibtracs)

278519

# Select only 6-hourly data (00:00, 06:00, 12:00, 18:00)

In [15]:
pd.to_datetime(ibtracs['ISO_TIME']).dt.hour.unique()

array([ 0,  3,  6,  9, 12, 15, 18, 21,  2, 14,  8,  4, 13, 20, 23,  5, 11,
       17,  1, 10, 16, 19,  7, 22])

In [16]:
ibtracs = ibtracs[pd.to_datetime(ibtracs['ISO_TIME']).dt.hour.isin(time_sampling)]

In [17]:
len(ibtracs)

139557

# Remove half hours from the dataset (like 00:30)

In [18]:
ibtracs = ibtracs[(pd.to_datetime(ibtracs['ISO_TIME']).dt.minute==0) & (pd.to_datetime(ibtracs['ISO_TIME']).dt.second==0)]

In [19]:
len(ibtracs)

139498

# Shift TC Longitudes in the range 0, 360

In [21]:
ibtracs['LON'] = (ibtracs['LON'] + 360) % 360

# Select only TCs within the selected Spatial Extent

In [22]:
lat_lim = [0, 70]
lon_lim = [100, 320]
ibtracs = ibtracs[(ibtracs['LON'] >= lon_lim[0]) & (ibtracs['LON'] <= lon_lim[1]) & (ibtracs['LAT'] >= lat_lim[0]) & (ibtracs['LAT'] <= lat_lim[1])]

In [23]:
len(ibtracs)

86193

# Save the dataset

In [24]:
ibtracs = ibtracs.reset_index(drop=True)
ibtracs.to_csv(ibtracs_dst)