# Creating a Dataset from Filtered NHC Hurricane Archive Data

In [2]:
import pandas as pd
import numpy as np
import csv

Below are two options for datasets constructed using NHC Hurricane Archive data

In [11]:
# Creating a dictionary - one key for each hurricane - alternate hurDictset
hurDict = {}
newBasin = False

with open('filtered_data.csv', 'r') as f_in:
    reader = csv.reader(f_in)
    next(reader)
    for line in reader:
        if line[0] not in hurDict.keys():
            while len(line[4]) < 4: line[4] = '0' + line[4]
            currDate = pd.to_datetime(line[3] + line[4],format='%Y%m%d%H%M')
            hurDict[line[0]] = [line[1], [(currDate-currDate)/pd.Timedelta('1 hour')], [[line[7], line[8]]], [line[9]], [line[10]], [line[11:15]], [line[15:19]], [line[19:23]], [line[23]]]
            
        else:
            while len(line[4]) < 4: line[4] = '0' + line[4]
            date = pd.to_datetime(line[3] + line[4],format='%Y%m%d%H%M')

            if newBasin:
                currDate = date
                newBasin = False

            hurDict[line[0]][1].append((date - currDate)/pd.Timedelta('1 hour')) # Date to first numpy array
            hurDict[line[0]][2].append([line[7], line[8]]) # Lat and Lon to second numpy array
            hurDict[line[0]][3].append(line[9]) # vmax to third numpy array
            hurDict[line[0]][4].append(line[10]) # pres to fourth numpy array
            hurDict[line[0]][5].append(line[11:15]) # 34ne, 34se, 34sw, 34nw to fifth numpy array
            hurDict[line[0]][6].append(line[15:19]) # 50ne, 50se, 50sw, 50nw to sixth numpy array
            hurDict[line[0]][7].append(line[19:23]) # 64ne, 64se, 64sw, 64nw to seventh numpy array
            hurDict[line[0]][8].append(line[23]) # rmax to eighth numpy array

In [12]:
for hur in hurDict.keys():
    hurDict[hur][1] = np.array(hurDict[hur][1]).astype('float64')
    hurDict[hur][2] = np.array(hurDict[hur][2]).astype('float64')
    hurDict[hur][3] = np.array(hurDict[hur][3]).astype('float64')
    hurDict[hur][4] = np.array(hurDict[hur][4]).astype('float64')
    hurDict[hur][5] = np.array(hurDict[hur][5]).astype('float64')
    hurDict[hur][6] = np.array(hurDict[hur][6]).astype('float64')
    hurDict[hur][7] = np.array(hurDict[hur][7]).astype('float64')
    hurDict[hur][8] = np.array(
        [float(x) if x else np.nan for x in hurDict[hur][8]]
    )


In [13]:
df = pd.DataFrame(hurDict)
cols = ['name', 'time', 'trajectory', 'vmax', 'pres', '34', '50', '64', 'rmax']
headers = {}
for i in range(len(cols)):
    headers[i] = cols[i]
df = df.T.rename(columns=headers)

print(f'Number of hurricane tracks available: {df.index.nunique()}')
df

Number of hurricane tracks available: 79


Unnamed: 0,name,time,trajectory,vmax,pres,34,50,64,rmax
AL032014,BERTHA,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[22.7, -72.5], [24.1, -73.1], [25.4, -73.5], ...","[45.0, 55.0, 60.0, 70.0, 70.0, 65.0, 55.0, 50....","[1012.0, 1007.0, 1004.0, 998.0, 999.0, 1001.0,...","[[140.0, 100.0, 0.0, 50.0], [140.0, 100.0, 0.0...","[[0.0, 0.0, 0.0, 0.0], [40.0, 40.0, 0.0, 0.0],...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL042014,CRISTOBAL,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[21.5, -72.2], [22.0, -72.5], [22.6, -72.9], ...","[30.0, 30.0, 35.0, 40.0, 45.0, 45.0, 45.0, 50....","[1005.0, 1004.0, 1002.0, 1001.0, 1001.0, 998.0...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL012015,ANA,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[26.8, -79.2], [28.2, -78.5], [29.7, -77.8], ...","[25.0, 25.0, 25.0, 30.0, 35.0, 40.0, 40.0, 40....","[1016.0, 1015.0, 1014.0, 1012.0, 1010.0, 1008....","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL022015,BILL,"[0.0, 6.0, 12.0, 16.75, 18.0, 24.0, 30.0, 36.0...","[[27.0, -94.3], [27.6, -95.2], [28.0, -96.0], ...","[45.0, 45.0, 50.0, 50.0, 50.0, 40.0, 30.0, 30....","[1005.0, 1001.0, 997.0, 997.0, 997.0, 998.0, 9...","[[140.0, 130.0, 0.0, 0.0], [130.0, 130.0, 0.0,...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL032015,CLAUDETTE,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0]","[[35.4, -75.4], [35.3, -74.6], [35.2, -73.6], ...","[25.0, 25.0, 25.0, 25.0, 25.0, 30.0, 40.0, 45.0]","[1010.0, 1010.0, 1010.0, 1010.0, 1010.0, 1008....","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[nan, nan, nan, nan, nan, nan, nan, nan]"
...,...,...,...,...,...,...,...,...,...
AL082023,FRANKLIN,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[26.7, -70.6], [27.5, -70.9], [28.2, -71.1], ...","[100.0, 125.0, 125.0, 130.0, 120.0, 115.0, 110...","[951.0, 937.0, 931.0, 926.0, 935.0, 941.0, 947...","[[130.0, 110.0, 100.0, 80.0], [130.0, 110.0, 1...","[[50.0, 40.0, 30.0, 40.0], [60.0, 40.0, 40.0, ...","[[25.0, 20.0, 10.0, 20.0], [30.0, 20.0, 20.0, ...","[10.0, 10.0, 10.0, 5.0, 5.0, 20.0, 20.0, 25.0,..."
AL092023,HAROLD,"[0.0, 6.0, 12.0, 18.0, 24.0, 27.0, 30.0, 36.0,...","[[24.7, -89.2], [25.0, -90.8], [25.4, -92.4], ...","[25.0, 30.0, 30.0, 40.0, 50.0, 50.0, 40.0, 30....","[1008.0, 1008.0, 1008.0, 1003.0, 996.0, 995.0,...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[90.0, 90.0, 70.0, 50.0, 30.0, 30.0, 30.0, 40...."
AL102023,IDALIA,"[0.0, 6.0, 12.0, 18.0, 21.0, 23.75, 24.0, 30.0...","[[23.8, -84.8], [25.3, -84.8], [26.9, -84.7], ...","[70.0, 80.0, 90.0, 105.0, 115.0, 100.0, 100.0,...","[978.0, 973.0, 965.0, 945.0, 942.0, 950.0, 954...","[[120.0, 140.0, 50.0, 60.0], [120.0, 140.0, 50...","[[50.0, 40.0, 20.0, 30.0], [50.0, 40.0, 20.0, ...","[[20.0, 15.0, 0.0, 10.0], [20.0, 15.0, 10.0, 1...","[15.0, 15.0, 10.0, 10.0, 10.0, 10.0, 10.0, 20...."
AL132023,LEE,"[0.0, 6.0, 12.0, 18.0, 24.0, 26.0, 30.0, 36.0,...","[[37.1, -66.7], [38.7, -65.9], [40.9, -66.0], ...","[75.0, 75.0, 70.0, 65.0, 55.0, 55.0, 55.0, 50....","[962.0, 963.0, 965.0, 965.0, 968.0, 970.0, 979...","[[270.0, 270.0, 240.0, 300.0], [300.0, 300.0, ...","[[160.0, 170.0, 120.0, 150.0], [180.0, 180.0, ...","[[90.0, 80.0, 70.0, 100.0], [90.0, 120.0, 110....","[75.0, 90.0, 100.0, 100.0, 100.0, 100.0, 100.0..."


In [14]:
df.to_pickle('hurDict.pkl')

One more alternative, grouping all radii together

In [15]:
# Creating a dictionary - one key for each hurricane - alternate hurDictset
hurDict = {}
newBasin = False

with open('filtered_data.csv', 'r') as f_in:
    reader = csv.reader(f_in)
    next(reader)
    for line in reader:
        if line[0] not in hurDict.keys():
            while len(line[4]) < 4: line[4] = '0' + line[4]
            currDate = pd.to_datetime(line[3] + line[4],format='%Y%m%d%H%M')
            hurDict[line[0]] = [line[1], [(currDate-currDate)/pd.Timedelta('1 hour')], [[line[7], line[8]]], [line[9]], [line[10]], [line[11:23]], [line[23]]]
            
        else:
            while len(line[4]) < 4: line[4] = '0' + line[4]
            date = pd.to_datetime(line[3] + line[4],format='%Y%m%d%H%M')

            if newBasin:
                currDate = date
                newBasin = False

            hurDict[line[0]][1].append((date - currDate)/pd.Timedelta('1 hour')) # Date to first numpy array
            hurDict[line[0]][2].append([line[7], line[8]]) # Lat and Lon to second numpy array
            hurDict[line[0]][3].append(line[9]) # vmax to third numpy array
            hurDict[line[0]][4].append(line[10]) # pres to fourth numpy array
            hurDict[line[0]][5].append(line[11:23]) # All ne,se,sw,nw to fifth numpy array
            hurDict[line[0]][6].append(line[23]) # rmax to sixth numpy array

In [16]:
for hur in hurDict.keys():
    hurDict[hur][1] = np.array(hurDict[hur][1]).astype('float64')
    hurDict[hur][2] = np.array(hurDict[hur][2]).astype('float64')
    hurDict[hur][3] = np.array(hurDict[hur][3]).astype('float64')
    hurDict[hur][4] = np.array(hurDict[hur][4]).astype('float64')
    hurDict[hur][5] = np.array(hurDict[hur][5]).astype('float64')
    hurDict[hur][6] = np.array(
        [float(x) if x else np.nan for x in hurDict[hur][6]]
    )

In [17]:
df = pd.DataFrame(hurDict)
cols = ['name', 'time', 'trajectory', 'vmax', 'pres', 'radii', 'rmax']
headers = {}
for i in range(len(cols)):
    headers[i] = cols[i]
df = df.T.rename(columns=headers)   

print(f'Number of hurricane tracks available: {df.index.nunique()}')
df

Number of hurricane tracks available: 79


Unnamed: 0,name,time,trajectory,vmax,pres,radii,rmax
AL032014,BERTHA,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[22.7, -72.5], [24.1, -73.1], [25.4, -73.5], ...","[45.0, 55.0, 60.0, 70.0, 70.0, 65.0, 55.0, 50....","[1012.0, 1007.0, 1004.0, 998.0, 999.0, 1001.0,...","[[140.0, 100.0, 0.0, 50.0, 0.0, 0.0, 0.0, 0.0,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL042014,CRISTOBAL,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[21.5, -72.2], [22.0, -72.5], [22.6, -72.9], ...","[30.0, 30.0, 35.0, 40.0, 45.0, 45.0, 45.0, 50....","[1005.0, 1004.0, 1002.0, 1001.0, 1001.0, 998.0...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL012015,ANA,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[26.8, -79.2], [28.2, -78.5], [29.7, -77.8], ...","[25.0, 25.0, 25.0, 30.0, 35.0, 40.0, 40.0, 40....","[1016.0, 1015.0, 1014.0, 1012.0, 1010.0, 1008....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL022015,BILL,"[0.0, 6.0, 12.0, 16.75, 18.0, 24.0, 30.0, 36.0...","[[27.0, -94.3], [27.6, -95.2], [28.0, -96.0], ...","[45.0, 45.0, 50.0, 50.0, 50.0, 40.0, 30.0, 30....","[1005.0, 1001.0, 997.0, 997.0, 997.0, 998.0, 9...","[[140.0, 130.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
AL032015,CLAUDETTE,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0]","[[35.4, -75.4], [35.3, -74.6], [35.2, -73.6], ...","[25.0, 25.0, 25.0, 25.0, 25.0, 30.0, 40.0, 45.0]","[1010.0, 1010.0, 1010.0, 1010.0, 1010.0, 1008....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[nan, nan, nan, nan, nan, nan, nan, nan]"
...,...,...,...,...,...,...,...
AL082023,FRANKLIN,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[26.7, -70.6], [27.5, -70.9], [28.2, -71.1], ...","[100.0, 125.0, 125.0, 130.0, 120.0, 115.0, 110...","[951.0, 937.0, 931.0, 926.0, 935.0, 941.0, 947...","[[130.0, 110.0, 100.0, 80.0, 50.0, 40.0, 30.0,...","[10.0, 10.0, 10.0, 5.0, 5.0, 20.0, 20.0, 25.0,..."
AL092023,HAROLD,"[0.0, 6.0, 12.0, 18.0, 24.0, 27.0, 30.0, 36.0,...","[[24.7, -89.2], [25.0, -90.8], [25.4, -92.4], ...","[25.0, 30.0, 30.0, 40.0, 50.0, 50.0, 40.0, 30....","[1008.0, 1008.0, 1008.0, 1003.0, 996.0, 995.0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[90.0, 90.0, 70.0, 50.0, 30.0, 30.0, 30.0, 40...."
AL102023,IDALIA,"[0.0, 6.0, 12.0, 18.0, 21.0, 23.75, 24.0, 30.0...","[[23.8, -84.8], [25.3, -84.8], [26.9, -84.7], ...","[70.0, 80.0, 90.0, 105.0, 115.0, 100.0, 100.0,...","[978.0, 973.0, 965.0, 945.0, 942.0, 950.0, 954...","[[120.0, 140.0, 50.0, 60.0, 50.0, 40.0, 20.0, ...","[15.0, 15.0, 10.0, 10.0, 10.0, 10.0, 10.0, 20...."
AL132023,LEE,"[0.0, 6.0, 12.0, 18.0, 24.0, 26.0, 30.0, 36.0,...","[[37.1, -66.7], [38.7, -65.9], [40.9, -66.0], ...","[75.0, 75.0, 70.0, 65.0, 55.0, 55.0, 55.0, 50....","[962.0, 963.0, 965.0, 965.0, 968.0, 970.0, 979...","[[270.0, 270.0, 240.0, 300.0, 160.0, 170.0, 12...","[75.0, 90.0, 100.0, 100.0, 100.0, 100.0, 100.0..."
