In [1]:
import pandas as pd
import numpy as np

In [2]:
baseline = (
    pd
    .read_csv("../baseline.csv", na_filter=False)
    .pivot(columns=['model', 'transformation'], index='target', values='positive_F1')
)

baseline.to_latex("../latex/baseline.tex")
display(baseline)

model,GLM,GLM,GLM,Random Forest
transformation,None,Root 4,PaCMAP,None
target,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Beach,0.609179,0.636111,0.749245,0.687719
FallFoliage,0.634873,0.688331,0.80805,0.850258
Field,0.732591,0.73251,0.835329,0.821317
Mountain,0.493888,0.512696,0.508571,0.467227
Sunset,0.772313,0.789668,0.854932,0.880478
Urban,0.538922,0.535552,0.568182,0.508197


In [3]:
tuples = [("GLM", "None"), ("GLM", "Root 4"), ("GLM", "PaCMAP (dim 50)"), ("RF", "None")]

baseline = (
    pd
    .read_csv("../baseline-multilabel.csv", na_filter=False)
    .set_index(pd.MultiIndex.from_tuples(tuples, names=["model", "transformation"]))
    .transpose()
)

baseline[["metric", "class"]] = pd.DataFrame(baseline.index.str.split(n=2).tolist(), index=baseline.index)
baseline = baseline.set_index(["metric", "class"])

baseline.to_latex("../latex/baseline-multilabel.tex")
display(baseline)

Unnamed: 0_level_0,model,GLM,GLM,GLM,RF
Unnamed: 0_level_1,transformation,None,Root 4,PaCMAP (dim 50),None
metric,class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
accuracy,-,0.68081,0.696288,0.735606,0.605996
precision,Beach,0.801846,0.822211,0.832931,0.888508
precision,Sunset,0.888126,0.902961,0.950207,0.986541
precision,FallFoliage,0.841089,0.869614,0.860202,0.950033
precision,Field,0.815072,0.825753,0.847741,0.873998
precision,Mountain,0.796185,0.796875,0.81044,0.897321
precision,Urban,0.799094,0.795139,0.779719,0.897351
recall,Beach,0.651531,0.681327,0.717711,0.54689
recall,Sunset,0.790157,0.782458,0.851902,0.809783
recall,FallFoliage,0.679872,0.732179,0.814231,0.755256


In [4]:
tuples = [("RBF", "first"), ("RBF", "second"), ("RBF", "third"), ("Poly", "first"), ("Poly", "second"), ("Poly", "third")]

svm = (
    pd
    .read_csv("multi_svm.csv", na_filter=False)
    .drop(["class_weight", "C", "kernel", "degree", "gamma", "coef0"], axis="columns")
    .set_index(pd.MultiIndex.from_tuples(tuples, names=["kernel", "position"]))
    .transpose()
)

svm[["metric", "class"]] = pd.DataFrame(svm.index.str.split(n=2).tolist(), index=svm.index)
svm = svm.set_index(["metric", "class"])

svm.to_latex("../latex/svm-multilabel.tex")
display(svm)

Unnamed: 0_level_0,kernel,RBF,RBF,RBF,Poly,Poly,Poly
Unnamed: 0_level_1,position,first,second,third,first,second,third
metric,class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
accuracy,-,0.867424,0.803939,0.799416,0.843701,0.830368,0.832186
precision,Beach,0.869962,0.876538,0.876286,0.87512,0.873709,0.875242
precision,Sunset,0.973077,0.984623,0.985615,0.981838,0.966333,0.977416
precision,FallFoliage,0.928748,0.939236,0.939651,0.925551,0.918919,0.918826
precision,Field,0.90131,0.900671,0.900026,0.899205,0.905106,0.900362
precision,Mountain,0.864843,0.870318,0.870557,0.861979,0.870192,0.858507
precision,Urban,0.8702,0.877178,0.881184,0.863821,0.854287,0.857143
recall,Beach,0.847816,0.762821,0.757953,0.810423,0.806149,0.79416
recall,Sunset,0.914855,0.90308,0.903986,0.861413,0.885266,0.8593
recall,FallFoliage,0.904231,0.881154,0.881154,0.886538,0.867308,0.889231


In [5]:
svm = (
    pd.concat([
        pd.read_csv("linear_svm.csv", keep_default_na=False),
        pd.read_csv("kernel_svm.csv", keep_default_na=False, na_values=[""]),
    ])
    .sort_values("positive_F1",ascending=False)
    .groupby("kernel")
    .head(3)
    .set_index("kernel")
    .sort_index()
    .drop(["true_positive", "false_positive", "true_negative", "false_negative"], axis="columns")
    .drop(["negaitve_F1", "mean_F1", "class_weight"], axis="columns")
)

cols = np.array(["C", "transformation", "penalty", "loss", "degree", "gamma", "coef0"])
def info(series):
    valid = ~series.isna()
    names = cols[valid]
    values = series[~series.isna()].to_list()
    return ", ".join([f"{name}: {value}" for name, value in zip(names, values)])

svm["info"] = svm[cols].apply(info, axis="columns")
svm = svm.drop(cols, axis="columns")
svm = (
    svm[["info", "positive_F1"]]
    .rename({"info": "Hyper-parameters", "positive_F1": "F1-score"}, axis="columns")
    .reset_index()
    .set_index(["kernel", "Hyper-parameters"])
)

svm.to_latex("../latex/SVM-single.tex")
display(svm)

Unnamed: 0_level_0,Unnamed: 1_level_0,F1-score
kernel,Hyper-parameters,Unnamed: 2_level_1
linear,"C: 0.1, transformation: root 4, penalty: l1, loss: squared_hinge",0.62994
linear,"C: 0.01, transformation: root 4, penalty: l2, loss: hinge",0.627193
linear,"C: 0.01, transformation: root 4, penalty: l2, loss: squared_hinge",0.618824
poly,"C: 0.1, degree: 5.0, gamma: 0.0034013605442176, coef0: 1.0",0.692206
poly,"C: 1.0, degree: 3.0, gamma: 0.0034013605442176, coef0: 1.0",0.6875
poly,"C: 0.01, degree: 2.0, gamma: 0.0583211843519804, coef0: 1.0",0.684636
precomputed,"C: 0.1, gamma: 0.0034013605442176, scale: 0.1",0.512338
precomputed,"C: 10.0, gamma: 1.156925355176084e-05, scale: 1.0",0.480243
precomputed,"C: 1.0, gamma: 0.0001983713753468, scale: 1.0",0.478668
rbf,"C: 10.0, gamma: 0.0034013605442176",0.690852


In [7]:
import json

with open("final_svm.json", mode="r") as file:
    result = json.load(file)

final_svm = pd.DataFrame({
    "precision": [v for k, v in result.items() if k.startswith("precision")],
    "recall": [v for k, v in result.items() if k.startswith("recall")],
}, index = [k.split()[1] for k in result.keys() if k.startswith("precision")]).transpose()

final_svm.to_latex("../latex/SVM-final.tex")
print(result["accuracy -"])
print(result["Hamming-Loss -"])
display(final_svm)

0.8630952380952381
0.07670995670995671


Unnamed: 0,Beach,Sunset,FallFoliage,Field,Mountain,Urban
precision,0.866394,0.963462,0.927052,0.899326,0.861358,0.869128
recall,0.84924,0.913043,0.887692,0.850744,0.818111,0.799209
