In [1]:
import os 
import xarray as xr
import numpy as np
import glob
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler, normalize
import itertools
from numpy.lib.stride_tricks import sliding_window_view
import datetime
from matplotlib import pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.ticker as mticker
from cartopy.mpl.ticker import (LongitudeFormatter, LatitudeFormatter)


In [100]:
project_dir = '/Users/huripari/Documents/PhD/TCs_Genesis'
data_dir = os.path.join(project_dir, 'data')
CI_dir = os.path.join(data_dir, 'CI')

CIs = ['EA-WR', 'ENSO3.4', 'EP-NP', 'NAO', 'PDO', 'PNA', 'SOI', 'TNA', 'TSA', 'WP'] # NOI and NP have missing data for some months
date_range = pd.date_range(start='1980-01-01', end='2021-12-01', freq='MS')
df_CIs = pd.DataFrame(index=date_range, columns=CIs)
df_CIs

Unnamed: 0,EA-WR,ENSO3.4,EP-NP,NAO,PDO,PNA,SOI,TNA,TSA,WP
1980-01-01,,,,,,,,,,
1980-02-01,,,,,,,,,,
1980-03-01,,,,,,,,,,
1980-04-01,,,,,,,,,,
1980-05-01,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2021-08-01,,,,,,,,,,
2021-09-01,,,,,,,,,,
2021-10-01,,,,,,,,,,
2021-11-01,,,,,,,,,,


In [101]:
for CI in CIs:
    filename = os.path.join(CI_dir, CI + '.txt')
    data = pd.read_table(filename, sep='\s+', header=None)
    for r, row in enumerate(df_CIs.iterrows()):
        idx = df_CIs.index[r]
        month = idx.month
        year = idx.year
        df_CIs.loc[idx, CI] = data[(data[0] == year)][month].values[0]

In [102]:
df_CIs

Unnamed: 0,EA-WR,ENSO3.4,EP-NP,NAO,PDO,PNA,SOI,TNA,TSA,WP
1980-01-01,0.31,0.54,0.53,-1.38,-0.11,-1.01,0.7,0.4,-0.15,-1.71
1980-02-01,-0.3,0.22,0.86,-0.39,1.32,2.03,0.5,0.41,-0.34,-0.5
1980-03-01,-1.42,0.03,0.15,-0.73,1.09,-0.64,-0.7,0.17,-0.26,1.09
1980-04-01,0.3,-0.07,-0.75,1.26,1.49,1.66,-1.0,0.56,-0.11,0.72
1980-05-01,0.24,0.08,-0.7,-1.34,1.2,-0.16,-0.0,0.71,-0.04,1.85
...,...,...,...,...,...,...,...,...,...,...
2021-08-01,-2.37,-0.53,-1.82,-0.49,-1.12,0.93,1.0,0.26,0.92,-1.94
2021-09-01,0.53,-0.55,-1.89,-0.06,-1.53,0.33,1.3,0.54,0.58,-0.65
2021-10-01,-0.62,-0.94,-2.42,-1.98,-2.55,1.41,1.2,0.46,0.65,1.74
2021-11-01,0.01,-0.94,0.34,-0.33,-2.52,0.68,1.6,0.42,0.5,-0.15


In [115]:
n_clusters = 12
cluster_data_dir = os.path.join(project_dir, 'FS_TCG', 'data', f'{n_clusters}clusters')
cluster_vars = ['msl', 'abs_vo850', 'ssta20', 'vws850-200']
for v, var in enumerate(cluster_vars):
    filename = f'averages_{var}GLB{n_clusters}.csv'
    path = os.path.join(cluster_data_dir, filename)
    if v == 0:
        dataset_cluster = pd.read_csv(path, index_col=0, parse_dates=True)
    else:
        dataset_cluster = pd.concat([dataset_cluster, pd.read_csv(path, index_col=0, parse_dates=True)], axis=1)


In [129]:
total_data = pd.concat([dataset_cluster, df_CIs], axis=1)
total_data['month'] = total_data.index.month

In [130]:
total_data

Unnamed: 0,mslGLB_cluster1,mslGLB_cluster2,mslGLB_cluster3,mslGLB_cluster4,mslGLB_cluster5,mslGLB_cluster6,mslGLB_cluster7,mslGLB_cluster8,mslGLB_cluster9,mslGLB_cluster10,...,ENSO3.4,EP-NP,NAO,PDO,PNA,SOI,TNA,TSA,WP,month
1980-01-01,126841.885521,103358.218217,103532.559975,104799.249031,114794.317782,144377.384479,106865.040092,146557.917668,102898.590764,121037.208208,...,0.54,0.53,-1.38,-0.11,-1.01,0.7,0.4,-0.15,-1.71,1
1980-02-01,127059.743220,103322.693537,103525.378788,104926.403020,114856.650528,144151.618039,106890.879009,146786.057692,102929.271497,120874.350527,...,0.22,0.86,-0.39,1.32,2.03,0.5,0.41,-0.34,-0.5,2
1980-03-01,126646.707450,103313.987926,102941.027462,104884.193314,114720.404930,143881.446238,107023.202713,147115.042819,102868.404658,123086.248117,...,0.03,0.15,-0.73,1.09,-0.64,-0.7,0.17,-0.26,1.09,3
1980-04-01,126620.534601,103366.048473,102408.791035,104974.396399,114798.604754,143466.158175,107152.460457,146960.854868,102839.648686,123234.393825,...,-0.07,-0.75,1.26,1.49,1.66,-1.0,0.56,-0.11,0.72,4
1980-05-01,126523.203709,103496.551847,102033.956755,104997.710756,114839.392606,143107.842121,107347.361050,147125.995343,102872.253185,123282.323042,...,0.08,-0.7,-1.34,1.2,-0.16,-0.0,0.71,-0.04,1.85,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-01,126731.041537,103672.094283,101767.226957,104903.185562,115276.254401,142879.080273,107588.900483,147104.931641,103044.081409,123162.274096,...,-0.53,-1.82,-0.49,-1.12,0.93,1.0,0.26,0.92,-1.94,8
2021-09-01,126931.797459,103600.419034,102188.194444,104961.040455,115304.529049,143046.178910,107538.061566,146981.850962,102976.456011,123208.885542,...,-0.55,-1.89,-0.06,-1.53,0.33,1.3,0.54,0.58,-0.65,9
2021-10-01,126926.659913,103500.101207,103150.015783,104901.138566,115254.176937,143635.963418,107294.948649,146891.094501,102951.672970,123139.674322,...,-0.94,-2.42,-1.98,-2.55,1.41,1.2,0.46,0.65,1.74,10
2021-11-01,127098.281640,103439.236506,103388.320707,105003.825501,115006.758363,144012.685130,107146.101988,146847.806490,102906.681927,122428.623870,...,-0.94,0.34,-0.33,-2.52,0.68,1.6,0.42,0.5,-0.15,11
