In [67]:
from pathlib import Path
import pandas as pd
import os


In [None]:
cwd = Path(os.getcwd())
directories = [x for x in (cwd / "deep_runs_binary").iterdir() if (not x.name.endswith("41nts")) and (not x.name.endswith("Restarts"))]
res = []
for directory in directories:
    df = pd.read_csv(directory / "test_results.csv")
    df["dataset"] = directory.name.split("_")[1]
    df["model"] = directory.name.split("_")[0]
    res.append(df)
    cm = pd.read_csv(directory / "confusion_matrix.csv")
    #add sensitivity and specificity (confusion matrix has 0 and 1 and indexes and columns are 0 and 1)
    sensitivity = cm.iloc[1, 1] / (cm.iloc[1, 0] + cm.iloc[1, 1])
    specificity = cm.iloc[0, 0] / (cm.iloc[0, 0] + cm.iloc[0, 1])
    # add sensitivity and specificity to the dataframe
    df["sensitivity"] = sensitivity
    df["specificity"] = specificity
    
    
res = pd.concat(res, ignore_index=True)
res.drop(columns=["epoch"], inplace=True)
res.columns = [col.replace("test_", "") for col in res.columns]

In [84]:
lgbm_dir_current = cwd / "results_lgb/results_current_dataset.csv"
lgbm_dir_mlm = cwd / "results_lgb/results_mlm5c.csv"

df_lgbm_current = pd.read_csv(lgbm_dir_current)
df_lgbm_current["dataset"] = "current"
df_lgbm_current["model"] = "lgbm"
df_lgbm_mlm = pd.read_csv(lgbm_dir_mlm)
df_lgbm_mlm["dataset"] = "mlm5c"
df_lgbm_mlm["model"] = "lgbm"

df_lgbm_current.rename(columns={"auc": "auroc"}, inplace=True)
df_lgbm_mlm.rename(columns={"auc": "auroc"}, inplace=True)

res = pd.concat([res, df_lgbm_current, df_lgbm_mlm], ignore_index=True)
res.columns = [col[0].upper() + col[1:] if isinstance(col, str) else col for col in res.columns]


deep5mc_results = {
    "Dataset": "deep5mc",
    "Model": "deep5mc",
    "Auroc": 0.938,
    "Accuracy": 0.852,
    "Sensitivity":  0.846,
    "Specificity":  0.857,
    "Auprc":  0.901,
}


res = pd.concat([res, pd.DataFrame([deep5mc_results])], ignore_index=True)

In [85]:
res

Unnamed: 0,Accuracy,Auroc,Auprc,Recall,Precision,F1,Dataset,Model,Sensitivity,Specificity
0,0.848022,0.916174,0.926787,0.819175,0.869331,0.843508,deepm5C,RNN,0.819175,0.87687
1,0.876879,0.942355,0.949875,0.823743,0.921694,0.86997,mlm5c,Transformer,0.823743,0.930016
2,0.948175,0.98153,0.974557,0.982007,0.919772,0.949871,current,RNN,0.982007,0.914344
3,0.847163,0.917493,0.929924,0.818659,0.86815,0.842678,deepm5C,1DCNN,0.818659,0.875666
4,0.848367,0.922139,0.928003,0.83774,0.855932,0.846738,mlm5c,1DCNN,0.83774,0.858994
5,0.930943,0.976073,0.969892,0.973391,0.89722,0.933755,current,1DCNN,0.973391,0.888495
6,0.861952,0.926879,0.936574,0.827429,0.888797,0.857016,deepm5C,Transformer,0.827429,0.896475
7,0.946148,0.980809,0.974259,0.983528,0.915114,0.948088,current,Transformer,0.983528,0.908768
8,0.869881,0.937388,0.940416,0.849663,0.885467,0.867196,mlm5c,RNN,0.849663,0.890098
9,0.625443,0.681986,0.648989,0.710846,0.607143,0.654915,current,lgbm,,


In [88]:
print(res.to_latex(float_format="%.3f"))

\begin{tabular}{lrrrrrrllrr}
\toprule
 & Accuracy & Auroc & Auprc & Recall & Precision & F1 & Dataset & Model & Sensitivity & Specificity \\
\midrule
0 & 0.848 & 0.916 & 0.927 & 0.819 & 0.869 & 0.844 & deepm5C & RNN & 0.819 & 0.877 \\
1 & 0.877 & 0.942 & 0.950 & 0.824 & 0.922 & 0.870 & mlm5c & Transformer & 0.824 & 0.930 \\
2 & 0.948 & 0.982 & 0.975 & 0.982 & 0.920 & 0.950 & current & RNN & 0.982 & 0.914 \\
3 & 0.847 & 0.917 & 0.930 & 0.819 & 0.868 & 0.843 & deepm5C & 1DCNN & 0.819 & 0.876 \\
4 & 0.848 & 0.922 & 0.928 & 0.838 & 0.856 & 0.847 & mlm5c & 1DCNN & 0.838 & 0.859 \\
5 & 0.931 & 0.976 & 0.970 & 0.973 & 0.897 & 0.934 & current & 1DCNN & 0.973 & 0.888 \\
6 & 0.862 & 0.927 & 0.937 & 0.827 & 0.889 & 0.857 & deepm5C & Transformer & 0.827 & 0.896 \\
7 & 0.946 & 0.981 & 0.974 & 0.984 & 0.915 & 0.948 & current & Transformer & 0.984 & 0.909 \\
8 & 0.870 & 0.937 & 0.940 & 0.850 & 0.885 & 0.867 & mlm5c & RNN & 0.850 & 0.890 \\
9 & 0.625 & 0.682 & 0.649 & 0.711 & 0.607 & 0.655 & current &

In [89]:
precision=  0.941 
recall = 0.937
f1 = 2 * (precision * recall) / (precision + recall)
print(f"Precision: {precision:.3f}, Recall: {recall:.3f}, F1: {f1:.3f}")

Precision: 0.941, Recall: 0.937, F1: 0.939
