In [None]:
import huracanpy

import xarray as xr
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from glob import glob
from tqdm import tqdm

## Tracks without CPS

In [None]:
flist = glob("TRACK_netcdf/*/*TRACK_all*.nc") # + glob("TRACK_netcdf/*/*TRACK_NH_all*.nc")

In [None]:
# List of available datasets
sources = [f.split('/')[1] for f in flist]
sources

In [None]:
# Path dict
filepaths = {sources[i]:flist[i] for i in range(len(flist))}

In [None]:
# Data dict
data = {}
for s in tqdm(sources):
    data[s] = xr.open_dataset(filepaths[s]).rename({"psl":"pres"})

In [None]:
# Check trackids
for s in sources:
    sample = np.random.choice(len(data[s].record), 3)
    sample_pts = data[s].sel(record = sample)
    print(s, sample_pts.track_id.values)

In [None]:
# Check durations
for i, s in enumerate(sources):
    Ymin, Ymax = data[s].time.dt.year.min().values, data[s].time.dt.year.max().values
    plt.plot([Ymin, Ymax], [i, i], marker = 'o')
    plt.text(Ymin, i+0.2, Ymin, horizontalalignment='center',)
    plt.text(Ymax, i+0.2, Ymax, horizontalalignment='center',)
plt.yticks(np.arange(len(sources)), sources)
plt.xticks([1800, 1900, 1950, 2000, 2025])
plt.ylim(-0.5, len(sources))
sns.despine()
plt.grid(axis = 'x')

In [None]:
# Check Number of tracks per year
fig, axs = plt.subplots(figsize = (20,5))
for s in sources:
    Y = np.arange(1850, 2030)
    H, _ = np.histogram(data[s].hrcn.get_gen_vals().time.dt.year, bins = np.arange(1849.5, 2030))
    axs.plot(Y[H>0], H[H>0],label = s)
plt.legend()
plt.ylim(0)

In [None]:
# Check geographical distribution
for s in sources:
    sample = np.random.choice(len(data[s].record), 100000)
    sample_pts = data[s].sel(record = sample)
    sample_pts.hrcn.plot_density()
    plt.title(s)

In [None]:
# Check latitude distribution
for s in sources:
    sample = np.random.choice(len(data[s].record), 10000)
    sample_pts = data[s].sel(record = sample)
    sns.kdeplot(sample_pts.lat, label = s)
plt.legend()

In [None]:
# Check psl values
for s in sources:
    sample = np.random.choice(len(data[s].record), 1000)
    sample_pts = data[s].sel(record = sample)
    sns.kdeplot(x=sample_pts.lat, y=sample_pts.pres, levels = 2, label = s)
plt.legend()

In [None]:
# Check wind values
for s in sources:
    sample = np.random.choice(len(data[s].record), 1000)
    sample_pts = data[s].sel(record = sample)
    sns.kdeplot(x=sample_pts.lat, y=sample_pts.wind10.where(sample_pts.wind10 <1e10), levels = 2,)
    plt.legend()

In [None]:
for s in sources:
    huracanpy.save(data[s], "TRACK/TRACK-"+s+".csv")

## Tracks with CPS

In [None]:
flist = glob("TRACK_netcdf/*/*TRACK_tcident_nolat_CPS.nc") 
flist

In [None]:
# List of available datasets
sources = [f.split('/')[1] for f in flist]
sources

In [None]:
# Path dict
filepaths = {sources[i]:flist[i] for i in range(len(flist))}

In [None]:
# Data dict
data = {}
for s in tqdm(sources):
    data[s] = xr.open_dataset(filepaths[s]).rename({"psl":"pres"})

In [None]:
# Check trackids
for s in sources:
    sample = np.random.choice(len(data[s].record), 3)
    sample_pts = data[s].sel(record = sample)
    print(s, sample_pts.track_id.values)

In [None]:
# Check Number of tracks per year
fig, axs = plt.subplots(figsize = (20,5))
for s in sources:
    Y = np.arange(1850, 2030)
    H, _ = np.histogram(data[s].hrcn.get_gen_vals().time.dt.year, bins = np.arange(1849.5, 2030))
    axs.plot(Y[H>0], H[H>0],label = s)
plt.legend()
plt.ylim(0)

In [None]:
# Check durations
for i, s in enumerate(sources):
    Ymin, Ymax = data[s].time.dt.year.min().values, data[s].time.dt.year.max().values
    plt.plot([Ymin, Ymax], [i, i], marker = 'o')
    plt.text(Ymin, i+0.2, Ymin, horizontalalignment='center',)
    plt.text(Ymax, i+0.2, Ymax, horizontalalignment='center',)
plt.yticks(np.arange(len(sources)), sources)
plt.xticks([1800, 1900, 1950, 2000, 2025])
plt.ylim(-0.5, len(sources))
sns.despine()
plt.grid(axis = 'x')

In [None]:
# Check geographical distribution
for s in sources:
    sample = np.random.choice(len(data[s].record), 100000)
    sample_pts = data[s].sel(record = sample)
    sample_pts.hrcn.plot_density()
    plt.title(s)

In [None]:
for s in sources:
    huracanpy.save(data[s], "TRACK_CPS/TRACK_CPS-"+s+".csv")