In [1]:
import os
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from os.path import join
import yaml
%matplotlib inline

In [None]:
models_dir = "/glade/p/cisl/aiml/ggantos/200607/"

In [None]:
model_paths = sorted([x[0] for x in os.walk(models_dir)][1:])
models = list(range(0,54))
models.remove(48)

In [None]:
classifier_scores = pd.read_csv(join(models_dir, "cam_run5_models_0/dnn_classifier_scores.csv"))
regressor_scores = pd.read_csv(join(models_dir, "cam_run5_models_0/dnn_regressor_scores.csv"))
classifier_scores

In [None]:
l2_weight = [1.0e-3, 1.0e-4, 1.0e-5]
lrs = [0.001, 0.0001, 0.00001]
hidden_layers = [2, 3, 4]
activation = ['relu', 'tanh']

In [None]:
outputs_reg = ['qrtend_TAU_1', 'nctend_TAU_1', 'nrtend_TAU_-1', 'nrtend_TAU_1']

In [None]:
regs_dict = {}
for i in models:
    y = yaml.load(open(f'../config/200607/cesm_tau_run5_full_train_nn_{i}.yml'), Loader=yaml.FullLoader)
    regs_dict[i] = {}
    regs_dict[i]['activation'] = y['classifier_networks']['activation']
    regs_dict[i]['hidden_layers'] = y['classifier_networks']['hidden_layers']
    regs_dict[i]['lr'] = y['classifier_networks']['lr']
    regs_dict[i]['l2_weight'] = y['classifier_networks']['l2_weight']


In [None]:
regressions = {}
for out in outputs_reg:
    regressions[out] = {}
    ids = []
    rmse = []
    mae = []
    r2 = []
    hellinger = []
    for i in models:
        reg = pd.read_csv(join(models_dir, f"cam_run5_models_{i}/dnn_regressor_scores.csv"))
        ids.append(i)
        rmse.append(float(reg.loc[reg['Output'] == out]["rmse"]))
        mae.append(float(reg.loc[reg['Output'] == out]["mae"]))
        r2.append(float(reg.loc[reg['Output'] == out]["r2"]))
        hellinger.append(float(reg.loc[reg['Output'] == out]["hellinger"]))
    regressions[out]["ids"] = ids
    regressions[out]["rmse"] = rmse
    regressions[out]["mae"] = mae
    regressions[out]["r2"] = r2
    regressions[out]["hellinger"] = hellinger
    

In [None]:
tops = []

In [None]:
N = 10
f, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, sharey=True, figsize=(16,8))
for out, ax in zip(outputs_reg, (ax1, ax2, ax3, ax4)):
    ax.plot(regressions[out]["rmse"], label="rmse")
    top = sorted(range(len(regressions[out]["rmse"])), key = lambda sub: regressions[out]["rmse"][sub])[-N:] 
    tops.append(top)
    for i in top:
        ax.annotate(i, (i, regressions[out]["rmse"][i]))
    ax.plot(regressions[out]["mae"], label="mae")
    ax.plot(regressions[out]["r2"], label="r2")
    top = sorted(range(len(regressions[out]["r2"])), key = lambda sub: regressions[out]["r2"][sub])[-N:] 
    tops.append(top)
    for i in top:
        ax.annotate(i, (i, regressions[out]["r2"][i]))
    ax.plot(regressions[out]["hellinger"], label="hellinger")
    plt.subplots_adjust(wspace=None, hspace=None)
plt.subplots_adjust(wspace = 0)
plt.legend(loc="best")
plt.show()

In [None]:
outputs_class = ['qrtend_TAU', 'nctend_TAU', 'nrtend_TAU']

In [None]:
classifications = {}
for out in outputs_class:
    classifications[out] = {}
    ids = []
    accuracy = []
    heidke = []
    peirce = []
    for i in models:
        clss = pd.read_csv(join(models_dir, f"cam_run5_models_{i}/dnn_classifier_scores.csv"))
        ids.append(i)
        accuracy.append(float(clss.loc[clss['Output'] == out]["accuracy"]))
        heidke.append(float(clss.loc[clss['Output'] == out]["heidke"]))
        peirce.append(float(clss.loc[clss['Output'] == out]["peirce"]))
    classifications[out]["ids"] = ids
    classifications[out]["accuracy"] = accuracy
    classifications[out]["heidke"] = heidke
    classifications[out]["peirce"] = peirce
    

In [None]:
f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, figsize=(15,8))
for out, ax in zip(outputs_class, (ax1, ax2, ax3)):
    ax.plot(classifications[out]["accuracy"], label="accuracy")
    top = sorted(range(len(classifications[out]["accuracy"])), key = lambda sub: classifications[out]["accuracy"][sub])[-N:]
    tops.append(top)
    for i in top:
        ax.annotate(i, (i, classifications[out]["accuracy"][i]))
    ax.plot(classifications[out]["heidke"], label="heidke")
    ax.plot(classifications[out]["peirce"], label="peirce")
    ax.set_title(out)
plt.subplots_adjust(wspace = 0)
plt.legend(loc="best")
plt.show()

In [None]:
tops = np.array(tops)
print(tops.shape)
tops

In [None]:
tops_intersection = set(tops[0]).intersection(*tops)
tops_intersection

In [None]:
unique_elements, counts_elements = np.unique(tops, return_counts=True)
for e, c in zip (unique_elements, counts_elements):
    print (f"Element {e} has a frequency count of {c}")
    

In [None]:
tops_top = [11,14,17,20,23,26,49,52]
for i in tops_top:
    print (regs_dict[i])
