In [1]:
import pandas as pd
import numpy as np

In [8]:
baseline = (
    pd
    .read_csv("../baseline.csv", na_filter=False)
    .pivot(columns=['model', 'transformation'], index='target', values='positive_F1')
)

baseline.to_latex("../latex/baseline.tex")
display(baseline)

model,GLM,GLM,GLM,Random Forest
transformation,None,Root 4,PaCMAP,None
target,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Beach,0.609179,0.636111,0.749245,0.687719
FallFoliage,0.634873,0.688331,0.80805,0.850258
Field,0.732591,0.73251,0.835329,0.821317
Mountain,0.493888,0.512696,0.508571,0.467227
Sunset,0.772313,0.789668,0.854932,0.880478
Urban,0.538922,0.535552,0.568182,0.508197


In [34]:
tuples = [("GLM", "None"), ("GLM", "Root 4"), ("GLM", "PaCMAP"), ("RF", "None")]

baseline = (
    pd
    .read_csv("../baseline-multilabel.csv", na_filter=False)
    .set_index(pd.MultiIndex.from_tuples(tuples, names=["model", "transformation"]))
    .transpose()
)

baseline[["metric", "class"]] = pd.DataFrame(baseline.index.str.split(n=2).tolist(), index=baseline.index)
baseline = baseline.set_index(["metric", "class"])

baseline.to_latex("../latex/baseline-multilabel.tex")
display(baseline)

Unnamed: 0_level_0,model,GLM,GLM,GLM,RF
Unnamed: 0_level_1,transformation,None,Root 4,PaCMAP,None
metric,class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
accuracy,-,0.696288,0.696288,0.696288,0.696288
precision,Beach,0.748528,0.822211,0.682409,0.767789
precision,Sunset,0.847222,0.902961,0.838174,0.846608
precision,FallFoliage,0.811476,0.869614,0.711708,0.806803
precision,Field,0.779257,0.825753,0.762072,0.804553
precision,Mountain,0.742647,0.796875,0.642247,0.744544
precision,Urban,0.721614,0.795139,0.66032,0.772903
recall,Beach,0.681327,0.681327,0.681327,0.681327
recall,Sunset,0.782458,0.782458,0.782458,0.782458
recall,FallFoliage,0.732179,0.732179,0.732179,0.732179


In [5]:
svm = (
    pd.concat([
        pd.read_csv("linear_svm.csv", keep_default_na=False),
        pd.read_csv("kernel_svm.csv", keep_default_na=False, na_values=[""]),
        pd.read_csv("custom_kernel_svm.csv", keep_default_na=False),
    ])
    .sort_values("positive_F1",ascending=False)
    .groupby("kernel")
    .head(3)
    .set_index("kernel")
    .sort_index()
    .drop(["true_positive", "false_positive", "true_negative", "false_negative"], axis="columns")
    .drop(["negaitve_F1", "mean_F1", "class_weight"], axis="columns")
    # .pivot(columns=['model', 'transformation'], index='target', values='positive_F1')
)

cols = np.array(["C", "transformation", "penalty", "loss", "degree", "gamma", "coef0", "scale"])
def info(series):
    valid = ~series.isna()
    names = cols[valid]
    values = series[~series.isna()].to_list()
    return ", ".join([f"{name}: {value}" for name, value in zip(names, values)])

svm["info"] = svm[cols].apply(info, axis="columns")
svm = svm.drop(cols, axis="columns")
svm = (
    svm[["info", "positive_F1"]]
    .rename({"info": "Hyper-parameters", "positive_F1": "F1-score"}, axis="columns")
    .reset_index()
    .set_index(["kernel", "Hyper-parameters"])
)

svm.to_latex("../latex/SVM-single.tex")
display(svm)

Unnamed: 0_level_0,Unnamed: 1_level_0,F1-score
kernel,Hyper-parameters,Unnamed: 2_level_1
linear,"C: 0.1, transformation: root 4, penalty: l1, loss: squared_hinge",0.62994
linear,"C: 0.01, transformation: root 4, penalty: l2, loss: hinge",0.627193
linear,"C: 0.01, transformation: root 4, penalty: l2, loss: squared_hinge",0.618824
poly,"C: 0.1, degree: 5.0, gamma: 0.0034013605442176, coef0: 1.0",0.692206
poly,"C: 1.0, degree: 3.0, gamma: 0.0034013605442176, coef0: 1.0",0.6875
poly,"C: 0.01, degree: 2.0, gamma: 0.0583211843519804, coef0: 1.0",0.684636
precomputed,"C: 0.1, gamma: 0.0034013605442176, scale: 0.1",0.512338
precomputed,"C: 10.0, gamma: 1.156925355176084e-05, scale: 1.0",0.480243
precomputed,"C: 1.0, gamma: 0.0001983713753468, scale: 1.0",0.478668
rbf,"C: 10.0, gamma: 0.0034013605442176",0.690852
