In [None]:
import scipy.ndimage
import xarray as xr 
import numpy as np 
import cmaps as cmap 
import matplotlib.pyplot as plt
import scipy 
import warnings
import matplotlib.patheffects as pe
from scipy.ndimage import gaussian_filter
from matplotlib import rcParams
from helper import helicity 

warnings.filterwarnings("ignore")
# rcParams['font.family'] = 'Courier New'

def add_storm_dim(dataset):
    dataset = dataset.assign_coords(case=("case", dataset["case_number_global"].values))
    
    return dataset

def getAngles(dataset, caseList):
    l = []
    for x in range(len(caseList)):
        value = caseList[x]
        temp = dataset.isel(num_cases = value)
        l.append(temp.values)
    return np.array(l)

In [None]:
def filterByParam(data, var, value, operator):
    filtered = []
    for x in range(len(data)):
        if operator == '>' and (var[x] > value):
            filtered.append(data[x])
        elif operator == '>=' and (var[x] >= value):
            filtered.append(data[x])
        elif operator == '<' and (var[x] < value):
            filtered.append(data[x])
        elif operator == '<=' and (var[x] <= value):
            filtered.append(data[x])

    return filtered

In [None]:
from defaultPlots import scatter
import pandas as pd 
import numpy as np 
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
import scipy 
import requests
from xgboost import XGBRegressor

def regression(input, output, RF = True, GB = False): 
    print('Input: ', input.shape, 'Output: ', output.shape)    
    trainIn = input[:375]
    trainOut = output[:375]
    testIn = input[375:]
    testOut = output[375:]

    print('Training: ', trainIn.shape[0], 'Testing: ', testIn.shape[0], 'Ratio: ', trainIn.shape[0] / input.shape[0] * 100)

    if RF == True:
        regr = RandomForestRegressor(n_estimators=200, n_jobs=-1)#, max_features = 0.75, max_depth = 25)
        regr.fit(trainIn, trainOut) 
        predictTest = regr.predict(testIn)
    elif GB == True:
        regr = XGBRegressor(
            n_estimators=300,
            learning_rate=0.05,
            max_depth=10,
            subsample=0.8,
            colsample_bytree=0.8,
            objective='reg:squarederror',
            random_state=42
        )
        regr.fit(trainIn, trainOut)
        predictTest = regr.predict(testIn)
    else:
        regr = linear_model.LinearRegression()
        regr.fit(trainIn, trainOut)
        predictTest = regr.predict(testIn)

    try:
        importance = regr.feature_importances_
    except:
        coef = np.abs(regr.coef_ * np.nanstd(input, axis = 0))
        importance = coef / np.sum(coef)

    corr, sig = scipy.stats.pearsonr(predictTest, testOut)
    error2 = np.sqrt(np.mean((predictTest - testOut)**2))
    error = np.mean(np.abs(predictTest - testOut))
    scatter(['Predicted DVMax (kt)', predictTest], ['DVMax (kt)', testOut])#, hline = False, vline = False)

    print(str(error) + f"kt MAE\n{str(error2)}kt RSME\nR^2: {corr**2}")

    # import pickle
    # with open(r"C:\Users\deela\Downloads\SHIPS_RF_RI.cpickle", 'wb') as f:
    #     pickle.dump(regr, f)

    return regr, predictTest, importance, testIn, testOut

In [None]:
data = xr.open_mfdataset([r"C:\Users\deela\Downloads\split" + str(x) +"_analysis.nc" for x in range(1, 5)], preprocess = add_storm_dim)
print(list(data.variables))

tcradar = xr.open_mfdataset([r"C:\Users\deela\Downloads\tc_radar_v3m_1997_2019_xy_rel_swath_ships.nc", r"C:\Users\deela\Downloads\tc_radar_v3m_2020_2024_xy_rel_swath_ships.nc"], concat_dim='num_cases', combine='nested')
sddc = tcradar["sddc_ships"].sel(ships_lag_times = 0)
shgc = tcradar['shgc_ships'].sel(ships_lag_times = 0)
sh12 = tcradar['shgc_ships'].sel(ships_lag_times = 12)
vent = (tcradar['shdc_ships'].sel(ships_lag_times = 0) * (100 - tcradar['rhmd_ships'].sel(ships_lag_times = 0))) / tcradar['mpi_ships'].sel(ships_lag_times = 0)
vmax = tcradar['vmax_ships'].sel(ships_lag_times = 0)
dtvm = vmax - tcradar['vmax_ships'].sel(ships_lag_times = -12)
mpi = tcradar['mpi_ships'].sel(ships_lag_times = 0)
dist = tcradar['dtl_ships'].sel(ships_lag_times = 0)
dist12 = tcradar['dtl_ships'].sel(ships_lag_times = 12)
ohc = tcradar['ohc_ships'].sel(ships_lag_times = 0)
sst = tcradar['sst_ships'].sel(ships_lag_times = 0)
sst12 = tcradar['sst_ships'].sel(ships_lag_times = 12)
lat = tcradar['lat_ships'].sel(ships_lag_times = 0)
rh00 = tcradar['rhmd_ships'].sel(ships_lag_times = 0)
rh12 = tcradar['rhmd_ships'].sel(ships_lag_times = 12)

In [None]:
angles = 360 - getAngles(sddc, data.case_number_global.values)
shgc = getAngles(shgc, data.case_number_global.values)
sh12 = getAngles(sh12, data.case_number_global.values)
vent = getAngles(vent, data.case_number_global.values)
vmax = getAngles(vmax, data.case_number_global.values)
dtvm = getAngles(dtvm, data.case_number_global.values)
mpi = getAngles(mpi, data.case_number_global.values)
dist = getAngles(dist, data.case_number_global.values)
dist12 = getAngles(dist12, data.case_number_global.values)
ohc = getAngles(ohc, data.case_number_global.values)
sst = getAngles(sst, data.case_number_global.values)
sst12 = getAngles(sst12, data.case_number_global.values)
lats = getAngles(lat, data.case_number_global.values)
rh00 = getAngles(rh00, data.case_number_global.values)
rhtend = rh00 - getAngles(rh12, data.case_number_global.values)

In [None]:
tiltX = data['predx_mean']
tiltY = data['predy_mean']
dvmax = data['dvmax_24h']

In [None]:
from defaultPlots import histogram

histogram(['Vortex Tilt (X)', np.array(tiltX)], bounds = [-100, 105, 5], save = False)

In [None]:
xNames = ['Sea Surface Temperature', 'SST / OHC', 'VMax', 'Mid-Level RH', 'Generalized Shear (mag)', 'Deep-Layer Shear (dir)', 'VMax - MPI', 'sin(lat)', 'cos(lat)', 'RH_TEND', 'delwind_12', '12hr SHGC', 'Dist', 'Dist12', 'OHC', 'Shear * SST', 'Vent Proxy']
x = [sst, np.array(sst) / np.array(ohc), vmax, rh00, shgc, angles, np.array(vmax) - np.array(mpi), np.sin(np.deg2rad(lats)), np.cos(np.deg2rad(lats)), rhtend, dtvm, sh12, dist, dist12, ohc, np.array(shgc) * (np.array(sst)), vent]
y = np.array(dvmax)

for i, (name, arr) in enumerate(zip(xNames, x)):
    print(i, name, len(arr))

x = [np.asarray(a, dtype=float) for a in x]
x = [np.where(a == 9999, np.nan, a) for a in x] 

y = np.where(y == 9999, np.nan, y)

# Start with a mask of all True
mask = ~np.isnan(y) & ~np.isinf(y)

# Update the mask based on each predictor
for i in range(len(x)):
    mask = mask & ~np.isnan(x[i]) & ~np.isinf(x[i])

# Apply the final mask to each predictor
for i in range(len(x)):
    x[i] = x[i][mask]
    # x[i] = norm(x[i])

# Apply the mask to the target
y = y[mask]

# Stack predictors into 2D array and regress
x = np.column_stack(x)

regr, predictTest, importance, testIn, testOut = regression(x, y, RF = True, GB = False)

feat = dict(zip(xNames, importance))
# Print one key-value pair at a time
for key, value in feat.items():
    print(f"Feature: {key:>35}, Importance: {value}")
print('\n')
# for x in range(len(storm_name)):
#     print(storm_name[x], y[x], predictTest[x])

In [None]:
xNames = ['Sea Surface Temperature', 'SST / OHC', 'VMax', 'Mid-Level RH', 'Generalized Shear (mag)', 'Deep-Layer Shear (dir)', 'VMax - MPI', 'sin(lat)', 'cos(lat)', 'RH_TEND', 'delwind_12', '12hr SHGC', 'Dist', 'Dist12', 'OHC', 'Shear * SST', 'Vent Proxy', 'TILT X', 'TILT Y']
x = [sst, np.array(sst) / np.array(ohc), vmax, rh00, shgc, angles, np.array(vmax) - np.array(mpi), np.sin(np.deg2rad(lats)), np.cos(np.deg2rad(lats)), rhtend, dtvm, sh12, dist, dist12, ohc, np.array(shgc) * (np.array(sst)), vent, tiltX, tiltY]
y = np.array(dvmax)

for i, (name, arr) in enumerate(zip(xNames, x)):
    print(i, name, len(arr))

x = [np.asarray(a, dtype=float) for a in x]
x = [np.where(a == 9999, np.nan, a) for a in x] 

y = np.where(y == 9999, np.nan, y)

# Start with a mask of all True
mask = ~np.isnan(y) & ~np.isinf(y)

# Update the mask based on each predictor
for i in range(len(x)):
    mask = mask & ~np.isnan(x[i]) & ~np.isinf(x[i])

# Apply the final mask to each predictor
for i in range(len(x)):
    x[i] = x[i][mask]
    # x[i] = norm(x[i])

# Apply the mask to the target
y = y[mask]

# Stack predictors into 2D array and regress
x = np.column_stack(x)

regr, predictTest, importance, testIn, testOut = regression(x, y, RF = True, GB = False)

feat = dict(zip(xNames, importance))
# Print one key-value pair at a time
for key, value in feat.items():
    print(f"Feature: {key:>35}, Importance: {value}")
print('\n')
# for x in range(len(storm_name)):
#     print(storm_name[x], y[x], predictTest[x])