In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
sns.set(rc={"figure.figsize":(7, 7)}) #width=3, #height=4

import threading
import os

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv("./wandb_export.csv")
df = df.drop(index=8) # remove duplicate of restart

df_train = df[df["Name"].str.startswith("Traning")]

summary = pd.DataFrame(columns=["ID","Loss", "Warmup","Psudoinputs","S", "Orgmis","Seed","MaxEpochs"])


for d in df_train["Name"].str.split(" "):
    id = d[1].split("-")[0]
    
    test_ = df[df["Name"].str.startswith("Testing "+str(id))]
    train_ = df[df["Name"].str.startswith("Traning "+str(id))]
        
    summary_data = {
        "ID":[str(id)],
        "Loss":test_["mean(loss)"].values,
        "Warmup":train_["Warmup epochs"].values,
        "Psudoinputs":train_["lambda (psudosamples)"].values,
        "S":train_["S (encoders)"].values,
        "Orgmis":train_["Original miselbo (old)"].values,
        "Seed":train_["Seed"].values,
        "MaxEpochs":train_["Max epochs"].values,
    }
        
    summary = summary.append(pd.DataFrame(summary_data))

summary

In [None]:
STORE_MODELS = "./path/STORE_MODELS"
def load_parameters(model_id, STORE_MODELS=STORE_MODELS, data_type="test"):
    if data_type == "test":
        z_mean = pd.read_csv("%s/%s_latent/test_z_mean.csv" % (STORE_MODELS, model_id))
        z_std = pd.read_csv("%s/%s_latent/test_z_std.csv" % (STORE_MODELS, model_id))
        label = pd.read_csv("%s/%s_latent/test_label.csv" % (STORE_MODELS, model_id))
    
    elif data_type == "train":
        z_mean = pd.read_csv("%s/%s_latent/train_z_mean.csv" % (STORE_MODELS, model_id))
        z_std = pd.read_csv("%s/%s_latent/train_z_std.csv" % (STORE_MODELS, model_id))
        label = pd.read_csv("%s/%s_latent/train_label.csv" % (STORE_MODELS, model_id))
    
    return label, z_mean, z_std


label, z_mean, z_std = load_parameters("example")

In [None]:
def load_metrics(model_id, STORE_MODELS=STORE_MODELS):
    
    files = os.listdir("%s/%s_test/" % (STORE_MODELS, model_id))
    S = sum(map(lambda x: x.startswith("test") , files))-2
    
    df = pd.read_csv("%s/%s_test/test_qjsd.csv" % (STORE_MODELS, model_id))
    df = df.rename(columns={"x1":"qjsd"})
    
    df["loss"] = pd.read_csv("%s/%s_test/test_loss.csv" % (STORE_MODELS, model_id)).values.T
    
    for s in range(S):
         df["kl_" + str(s+1)] = pd.read_csv("%s/%s_test/test_KL_q%i.csv" % (STORE_MODELS, model_id, s+1)).values
    
    return df


def plot_metrics(df):
    fig, axs = plt.subplots(ncols=3, nrows=1)
    axs[0].boxplot(df["qjsd"], labels=["qjsd"])
    axs[1].boxplot(df["loss"], labels=["loss"])
    axs[2].boxplot(df.drop(["qjsd", "loss"], axis=1))
    fig.tight_layout()
    plt.show()



df  = load_metrics("example")
plot_metrics(df)

In [None]:
def sample(mean, std, k=1):
    n, f = mean.shape
    out = [mean + np.random.rand(n,f) * std for i in range(k)]
    out = np.concatenate(out, axis=1)
    return out

#data = sample(z_mean.values, z_std.values, k=1)

In [None]:
def get_TSNE_embedding(data,n_components=2):
    out = TSNE(n_components=n_components).fit_transform(data)
    return out 

#data_embedding = get_TSNE_embedding(data)

In [None]:
def scatter_embedding(data, label, title=""):
    df = pd.DataFrame(data, columns=["x1", "x2"])
    df["label"] = label
    sns.scatterplot(x='x1', y='x2', data=df, hue='label', alpha=0.2)
    plt.title(title)
    plt.show()
    
#scatter_embedding(data_embedding,label.values, title="example")

In [None]:
def get_pca_embedding(data,n_components=2):
    out = PCA(n_components=n_components).fit_transform(data)
    return out

In [None]:
names = {
    "Nearest Neighbors":[],
    "Linear SVM":[],
    #"RBF SVM":[],
    #"Gaussian Process":[],
    #"Decision Tree":[],
    #"Random Forest":[],
    "Neural Net":[],
    #"AdaBoost":[],
    #"Naive Bayes":[],
    "QDA":[],
}

classifier = [
    KNeighborsClassifier(),
    SVC(kernel="linear", C=0.025),
    #SVC(gamma=2, C=1),
    #GaussianProcessClassifier(1.0 * RBF(1.0)),
    #DecisionTreeClassifier(max_depth=5),
    #RandomForestClassifier(),
    MLPClassifier(max_iter=2000),
    #AdaBoostClassifier(),
    #GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]


def classification_models(model_id, k=0, names=names, classifier=classifier):
    h = 0.02 

    train_label, train_z_mean, train_z_std = load_parameters(model_id, data_type="train")
    test_label, test_z_mean, test_z_std = load_parameters(model_id, data_type="test")

    if k > 0:
        x_train = sample(train_z_mean.values, train_z_std.values, k=k)
        x_test = sample(test_z_mean.values, test_z_std.values, k=k)
    else: 
        x_train = np.concatenate([train_z_mean, train_z_std], axis=1)
        x_test = np.concatenate([test_z_mean, test_z_std], axis=1)

    y_train = train_label.values.ravel()
    y_test = test_label.values.ravel()


    def fitting_testing(name, clf, x_test, y_test, x_train, y_train):
        clf.fit(x_train, y_train)
        score = clf.score(x_test, y_test)
        print("%s: %.10f"% (name,score))
        return score

    
    for name, clf in zip(names.keys(),  classifier):
        score = fitting_testing(name,clf,x_test,y_test,x_train,y_train)
        names[name].append(score)
        
    

for id in summary["ID"]:
    classification_models(id, k=0)

for name in names.keys():
    summary["Classification " + name] = names[name]

In [None]:
# add metricsv to df
df_metric = load_metrics("example")
df_metric.columns

qjsd = []
kls = [] 

for id in summary["ID"]:
    df_metric = load_metrics(id)
    
    qjsd.append(np.mean(df_metric["qjsd"]))
    kls.append(np.mean(df_metric.drop(columns=["qjsd", "loss"]).values))

summary["mean(qjsd)"] = qjsd
summary["mean(kl_all)"] = kls

In [None]:
summary.to_csv("analytics.csv")