# Creating a Dictionary from All NCH Hurricane Archive Data

In [37]:
from herbie import Herbie
import numpy as np
import pandas as pd
import csv

In [46]:
filename = 'hurdat2-1851-2023-051124.txt'
missingVal = -999
basin='AL'
columnHeaders = ['id', 'name', 'entries', 'date', 'time', 'record_identifier', 'status', 'lat', 'lon', 'vmax', 'pres', '34ne', '34se', '34sw', '34nw', '50ne', '50se', '50sw', '50nw', '64ne', '64se', '64sw', '64nw', 'rmax']

In [47]:
# Reformatting input file to csv and creating a new file named 'data.csv'
with open(filename, 'r') as f_in, open('all_data.csv', 'w') as f_out:
    tmp = ""
    for line in f_in:
        line = line.replace(' ', '')  
        if line.startswith(basin):
            tmp = line.strip()  
        else:
            line = tmp + line  
            f_out.write(line)  

In [48]:
data = pd.read_csv('all_data.csv', header=None, names=columnHeaders)
data.replace(-999, np.nan, inplace=True) 
data

Unnamed: 0,id,name,entries,date,time,record_identifier,status,lat,lon,vmax,...,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw,rmax
0,AL011851,UNNAMED,14,18510625,0,,HU,28.0N,94.8W,80,...,,,,,,,,,,
1,AL011851,UNNAMED,14,18510625,600,,HU,28.0N,95.4W,80,...,,,,,,,,,,
2,AL011851,UNNAMED,14,18510625,1200,,HU,28.0N,96.0W,80,...,,,,,,,,,,
3,AL011851,UNNAMED,14,18510625,1800,,HU,28.1N,96.5W,80,...,,,,,,,,,,
4,AL011851,UNNAMED,14,18510625,2100,L,HU,28.2N,96.8W,80,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54744,AL212023,TWENTY-ONE,6,20231023,1800,,TD,11.5N,83.2W,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
54745,AL212023,TWENTY-ONE,6,20231024,0,,TD,12.2N,83.4W,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
54746,AL212023,TWENTY-ONE,6,20231024,130,L,TD,12.4N,83.5W,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
54747,AL212023,TWENTY-ONE,6,20231024,600,,TD,13.0N,83.8W,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0


In [49]:
# Convert latitude
data['lat'] = data['lat'].str[:-1].astype(float)


# Convert longitude
data['lon'] = data['lon'].apply(lambda x: '-' + x if not x.startswith('-') else x)
data['lon'] = data['lon'].str[:-1].astype(float)

data

Unnamed: 0,id,name,entries,date,time,record_identifier,status,lat,lon,vmax,...,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw,rmax
0,AL011851,UNNAMED,14,18510625,0,,HU,28.0,-94.8,80,...,,,,,,,,,,
1,AL011851,UNNAMED,14,18510625,600,,HU,28.0,-95.4,80,...,,,,,,,,,,
2,AL011851,UNNAMED,14,18510625,1200,,HU,28.0,-96.0,80,...,,,,,,,,,,
3,AL011851,UNNAMED,14,18510625,1800,,HU,28.1,-96.5,80,...,,,,,,,,,,
4,AL011851,UNNAMED,14,18510625,2100,L,HU,28.2,-96.8,80,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54744,AL212023,TWENTY-ONE,6,20231023,1800,,TD,11.5,-83.2,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
54745,AL212023,TWENTY-ONE,6,20231024,0,,TD,12.2,-83.4,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
54746,AL212023,TWENTY-ONE,6,20231024,130,L,TD,12.4,-83.5,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0
54747,AL212023,TWENTY-ONE,6,20231024,600,,TD,13.0,-83.8,25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0


In [50]:
# Creating a dictionary - one key for each hurricane - alternate hurDictset
hurDict = {}
newBasin = False

with open('all_data.csv', 'r') as f_in:
    reader = csv.reader(f_in)
    next(reader)
    for line in reader:
        if line[0] not in hurDict.keys():
            while len(line[4]) < 4: line[4] = '0' + line[4]
            currDate = pd.to_datetime(line[3] + line[4],format='%Y%m%d%H%M')
            hurDict[line[0]] = [line[1], [(currDate-currDate)/pd.Timedelta('1 hour')], [[line[7][:-1], line[8][:-1]]], [line[9]], [line[10]], [line[11:15]], [line[15:19]], [line[19:23]], [line[23]]]
            
        else:
            while len(line[4]) < 4: line[4] = '0' + line[4]
            date = pd.to_datetime(line[3] + line[4],format='%Y%m%d%H%M')

            if newBasin:
                currDate = date
                newBasin = False

            hurDict[line[0]][1].append((date - currDate)/pd.Timedelta('1 hour')) # Date to first numpy array
            hurDict[line[0]][2].append([line[7][:-1], line[8][:-1]]) # Lat and Lon to second numpy array
            hurDict[line[0]][3].append(line[9]) # vmax to third numpy array
            hurDict[line[0]][4].append(line[10]) # pres to fourth numpy array
            hurDict[line[0]][5].append(line[11:15]) # 34ne, 34se, 34sw, 34nw to fifth numpy array
            hurDict[line[0]][6].append(line[15:19]) # 50ne, 50se, 50sw, 50nw to sixth numpy array
            hurDict[line[0]][7].append(line[19:23]) # 64ne, 64se, 64sw, 64nw to seventh numpy array
            hurDict[line[0]][8].append(line[23]) # rmax to eighth numpy array

In [51]:
for hur in hurDict.keys():
    hurDict[hur][1] = np.array(hurDict[hur][1]).astype('float64')
    hurDict[hur][2] = np.array(hurDict[hur][2]).astype('float64')
    hurDict[hur][3] = np.array(hurDict[hur][3]).astype('float64')
    hurDict[hur][4] = np.array(hurDict[hur][4]).astype('float64')
    hurDict[hur][5] = np.array(hurDict[hur][5]).astype('float64')
    hurDict[hur][6] = np.array(hurDict[hur][6]).astype('float64')
    hurDict[hur][7] = np.array(hurDict[hur][7]).astype('float64')
    hurDict[hur][8] = np.array(
        [float(x) if x else np.nan for x in hurDict[hur][8]]
    )


In [52]:
df = pd.DataFrame(hurDict)
cols = ['name', 'time', 'trajectory', 'vmax', 'pres', '34', '50', '64', 'rmax']
headers = {}
for i in range(len(cols)):
    headers[i] = cols[i]
df = df.T.rename(columns=headers)

print(f'Number of hurricane tracks available: {df.index.nunique()}')
df

Number of hurricane tracks available: 1973


Unnamed: 0,name,time,trajectory,vmax,pres,34,50,64,rmax
AL011851,UNNAMED,"[0.0, 6.0, 12.0, 15.0, 18.0, 24.0, 30.0, 36.0,...","[[28.0, 95.4], [28.0, 96.0], [28.1, 96.5], [28...","[80.0, 80.0, 80.0, 80.0, 70.0, 60.0, 60.0, 50....","[-999.0, -999.0, -999.0, -999.0, -999.0, -999....","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[-999.0, -999.0, -999.0, -999.0, -999.0, -999...."
AL021851,UNNAMED,[0.0],"[[22.2, 97.6]]",[80.0],[-999.0],"[[-999.0, -999.0, -999.0, -999.0]]","[[-999.0, -999.0, -999.0, -999.0]]","[[-999.0, -999.0, -999.0, -999.0]]",[-999.0]
AL031851,UNNAMED,[0.0],"[[12.0, 60.0]]",[50.0],[-999.0],"[[-999.0, -999.0, -999.0, -999.0]]","[[-999.0, -999.0, -999.0, -999.0]]","[[-999.0, -999.0, -999.0, -999.0]]",[-999.0]
AL041851,UNNAMED,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[13.4, 48.0], [13.7, 49.5], [14.0, 51.0], [14...","[40.0, 40.0, 50.0, 50.0, 60.0, 60.0, 70.0, 70....","[-999.0, -999.0, -999.0, -999.0, -999.0, -999....","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[-999.0, -999.0, -999.0, -999.0, -999.0, -999...."
AL051851,UNNAMED,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[32.5, 73.5], [32.5, 73.5], [32.5, 73.5], [32...","[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....","[-999.0, -999.0, -999.0, -999.0, -999.0, -999....","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[[-999.0, -999.0, -999.0, -999.0], [-999.0, -9...","[-999.0, -999.0, -999.0, -999.0, -999.0, -999...."
...,...,...,...,...,...,...,...,...,...
AL172023,PHILIPPE,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[15.5, 36.6], [15.6, 38.0], [15.7, 39.1], [15...","[30.0, 30.0, 35.0, 40.0, 45.0, 45.0, 45.0, 45....","[1007.0, 1007.0, 1005.0, 1003.0, 1001.0, 1000....","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[70.0, 70.0, 60.0, 50.0, 50.0, 40.0, 40.0, 40...."
AL182023,RINA,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[15.6, 44.5], [16.9, 45.1], [17.7, 45.8], [18...","[35.0, 35.0, 35.0, 40.0, 45.0, 45.0, 45.0, 45....","[1005.0, 1004.0, 1004.0, 1002.0, 999.0, 999.0,...","[[60.0, 50.0, 0.0, 0.0], [80.0, 60.0, 0.0, 0.0...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[60.0, 50.0, 50.0, 50.0, 40.0, 40.0, 40.0, 40...."
AL192023,SEAN,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[9.6, 30.2], [9.8, 31.4], [10.1, 32.6], [10.7...","[30.0, 35.0, 35.0, 35.0, 30.0, 30.0, 30.0, 35....","[1007.0, 1006.0, 1006.0, 1006.0, 1007.0, 1007....","[[0.0, 0.0, 0.0, 0.0], [80.0, 0.0, 0.0, 0.0], ...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[80.0, 80.0, 80.0, 80.0, 60.0, 40.0, 40.0, 40...."
AL202023,TAMMY,"[0.0, 6.0, 12.0, 18.0, 24.0, 30.0, 36.0, 42.0,...","[[12.9, 51.0], [13.0, 52.5], [13.2, 54.0], [13...","[35.0, 35.0, 45.0, 50.0, 50.0, 50.0, 50.0, 60....","[1007.0, 1006.0, 1004.0, 1004.0, 1002.0, 1001....","[[120.0, 0.0, 0.0, 0.0], [120.0, 0.0, 0.0, 60....","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [...","[80.0, 60.0, 60.0, 50.0, 50.0, 50.0, 50.0, 40...."
