In [None]:
import pandas as pd

mslAndSST = pd.read_csv('msl_and_sst.csv')
pressure = pd.read_csv('pressure.csv')

In [None]:
# Merge the dataframes based on latitude, longitude, and valid_time
merged_df = pd.merge(mslAndSST, pressure, on=['latitude', 'longitude', 'valid_time'], how='inner')
print(merged_df.head())

   latitude  longitude           valid_time       msl    sst  \
0       0.0     -120.0  2020-05-01 00:00:00  100878.0  299.0   
1       0.0     -120.0  2020-05-01 06:00:00  101223.0  299.0   
2       0.0     -120.0  2020-05-01 12:00:00  100960.0  299.0   
3       0.0     -120.0  2020-05-01 18:00:00  101279.0  299.0   
4       0.0     -120.0  2020-05-02 00:00:00  100918.0  299.0   

   vertical_wind_shear  relative_humidity     vorticity  
0            27.167423          43.270535 -9.262520e-06  
1            26.283302          54.062960  6.945053e-06  
2            28.613567          59.316223  8.961910e-07  
3            31.102613          56.043290 -6.822765e-06  
4            29.146366          38.300670 -1.818381e-06  


In [None]:
merged_df.to_csv('merged_2020_hurricane_predictors.csv', index=False)

In [None]:
'''
sst > 299.65
Latitude between 5° and 30°
msl < 101_000
wind shear < 10
relative humidity > 60
Vorticity > 10^-5
'''
df = merged_df[merged_df['sst'] > 299.65]
df = df[df['msl'] < 101_000]
df = df[df['vertical_wind_shear'] < 10]
df = df[df['relative_humidity'] > 60]
df = df[df['vorticity'] > 10**-5]
df = df[(df['latitude'] > 5) & (df['latitude'] < 45)]
df = df[(df['longitude'] < -10) & (df['longitude'] > -100)]
df

Unnamed: 0,latitude,longitude,valid_time,msl,sst,vertical_wind_shear,relative_humidity,vorticity
639456,6.0,-99.0,2020-05-07 00:00:00,100857.0,302.0,6.353390,70.682290,0.000014
639508,6.0,-99.0,2020-05-20 00:00:00,100948.0,302.0,4.723331,91.982090,0.000015
639510,6.0,-99.0,2020-05-20 12:00:00,100979.0,302.0,3.085481,91.525840,0.000110
639512,6.0,-99.0,2020-05-21 00:00:00,100929.0,302.0,6.755966,78.600490,0.000042
639750,6.0,-99.0,2020-07-19 12:00:00,100904.0,301.0,8.187444,65.447105,0.000018
...,...,...,...,...,...,...,...,...
4295842,41.0,-63.0,2020-08-17 12:00:00,100957.0,301.0,5.334395,92.450970,0.000020
4295843,41.0,-63.0,2020-08-17 18:00:00,100620.0,301.0,7.991423,63.981960,0.000032
4300177,41.0,-58.0,2020-08-31 06:00:00,100860.0,300.0,7.764542,82.190010,0.000041
4303542,41.0,-54.0,2020-08-16 12:00:00,100931.0,300.0,9.093582,75.314610,0.000055


In [None]:
df.to_csv('filtered_data.csv', index=False)

In [None]:
# prompt: read json file processed-hurdat2.json

import pandas as pd
import json

def read_json_file(file_path):
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None

# Example usage (replace 'processed-hurdat2.json' with your actual file path)
file_path = 'processed-hurdat2.json'
json_data = read_json_file(file_path)

hurdatObs = [obs for i in json_data if i['storm_id'] == '2020' for obs in i['observations']]
hurdatObs

[{'date': '2020-05-16',
  'time': '18:00 UTC',
  'latitude': 28.0,
  'longitude': -78.7,
  'wind_speed': 30,
  'pressure': 1008},
 {'date': '2020-05-17',
  'time': '00:00 UTC',
  'latitude': 28.9,
  'longitude': -78.0,
  'wind_speed': 35,
  'pressure': 1006},
 {'date': '2020-05-17',
  'time': '06:00 UTC',
  'latitude': 29.6,
  'longitude': -77.6,
  'wind_speed': 35,
  'pressure': 1004},
 {'date': '2020-05-17',
  'time': '12:00 UTC',
  'latitude': 30.3,
  'longitude': -77.5,
  'wind_speed': 35,
  'pressure': 1003},
 {'date': '2020-05-17',
  'time': '18:00 UTC',
  'latitude': 31.0,
  'longitude': -77.3,
  'wind_speed': 40,
  'pressure': 1003},
 {'date': '2020-05-18',
  'time': '00:00 UTC',
  'latitude': 31.9,
  'longitude': -77.0,
  'wind_speed': 40,
  'pressure': 1003},
 {'date': '2020-05-18',
  'time': '06:00 UTC',
  'latitude': 33.1,
  'longitude': -76.7,
  'wind_speed': 40,
  'pressure': 1002},
 {'date': '2020-05-18',
  'time': '12:00 UTC',
  'latitude': 34.4,
  'longitude': -75.9,
 

In [None]:
for i in hurdatObs:
    timestr = i['date'] + ' ' + i['time'][0:5] + ':00'

    timeRows = df[df['valid_time'] == timestr]
    if not timeRows.empty:
        latRows = timeRows[(timeRows['latitude'] > i['latitude'] - 1) & (timeRows['latitude'] < i['latitude'] + 1)]
        if not latRows.empty:
            lonRows = latRows[(latRows['longitude'] > i['longitude'] - 1) & (latRows['longitude'] < i['longitude'] + 1)]
            if not lonRows.empty:
                print(lonRows)

         latitude  longitude           valid_time       msl    sst  \
1992895      19.0      -91.0  2020-06-01 18:00:00  100916.0  303.0   

         vertical_wind_shear  relative_humidity  vorticity  
1992895             9.609555           91.04405   0.000182  
         latitude  longitude           valid_time       msl    sst  \
1992040      19.0      -92.0  2020-06-02 00:00:00  100663.0  303.0   
2095616      20.0      -92.0  2020-06-02 00:00:00  100703.0  302.0   

         vertical_wind_shear  relative_humidity  vorticity  
1992040             5.153374           84.47435   0.000149  
2095616             9.655107           97.47966   0.000190  
         latitude  longitude           valid_time       msl    sst  \
2094761      20.0      -93.0  2020-06-02 06:00:00  100585.0  302.0   

         vertical_wind_shear  relative_humidity  vorticity  
2094761             3.236699           97.24262   0.000448  
         latitude  longitude           valid_time       msl    sst  \
2094762   

In [None]:
lonRows.to_csv('simpleHurricanePredictorRowsThatMatchHurDat2.csv', index=False)