In [1]:
import pandas as pd
import os

In [2]:
MODELS_PATH = "/PATH_TO_/trained_models/"

In [3]:
# Create dataframe with all trained models
trained_models = os.listdir(MODELS_PATH)
trained_models = [m for m in trained_models if "vgg" in m]
model_df = pd.DataFrame(trained_models)
model_df_1 = model_df[0].str.replace("simple_", "").str.replace("pre_", "").str.split("_", n=2, expand=True)
model_df = pd.concat([model_df, model_df_1], axis=1)
model_df.columns = ["dir", "train_type", "model_id", "dataset"]
model_df.dataset = model_df.dataset.str.replace("^.*?maastricht", "maastricht", regex=True)

model_df

Unnamed: 0,dir,train_type,model_id,dataset
0,simple_replicated_vgg_covid_us,replicated,vgg,covid_us
1,simple_replicated_vgg_pocovid_covid_us_combined,replicated,vgg,pocovid_covid_us_combined
2,simple_replicated_vgg_pocovid,replicated,vgg,pocovid
3,simple_replicated_vgg_maastricht,replicated,vgg,maastricht
4,continued_vgg_pre_pocovid_maastricht,continued,vgg,maastricht
5,continued_vgg_pre_covid_us_maastricht,continued,vgg,maastricht
6,continued_vgg_pre_combined_maastricht,continued,vgg,maastricht


In [4]:
# Check if predictions exist for all models
for idx, row in model_df.iterrows():
    args = {
    "save_path": os.path.join(MODELS_PATH, row.dir, row.dir + "_results")
    }
    # Check if file exists
    if os.path.exists(args["save_path"] + "_all_frames_predicted.csv"):
        print(f"Predictions exist for {row.dir}")
    else:
        print(f"Predictions do not exist for {row.dir}")

Predictions exist for simple_replicated_vgg_covid_us
Predictions exist for simple_replicated_vgg_pocovid_covid_us_combined
Predictions exist for simple_replicated_vgg_pocovid
Predictions exist for simple_replicated_vgg_maastricht
Predictions exist for continued_vgg_pre_pocovid_maastricht
Predictions exist for continued_vgg_pre_covid_us_maastricht
Predictions exist for continued_vgg_pre_combined_maastricht


In [5]:
MODEL_TO_LOAD = "simple_replicated_vgg_maastricht"

# Check if row in model_df
if MODEL_TO_LOAD not in model_df.dir.values:
    raise ValueError("Model not found")

# Get row
row = model_df[model_df.dir == MODEL_TO_LOAD].iloc[0]

args = {
"dataset": row.dataset,
"weights": os.path.join(MODELS_PATH, row.dir),
"m_id": "vgg_base",
"classes": 2,
"folds": 5,
"save_path": os.path.join(MODELS_PATH, row.dir, row.dir + "_results")
}

if os.path.exists(args["save_path"] + "_all_frames_predicted.csv"):
    print(f"Predictions exist for {row.dir}")

args["save_path"]

Predictions exist for simple_replicated_vgg_maastricht


'/itet-stor/mrichte/covlus_bmicnas02/matthias/trained_models/simple_replicated_vgg_maastricht/simple_replicated_vgg_maastricht_results'

In [7]:
# Load evaluation results dir + results_aggregation_strategy_evaluation.csv
eval_df = pd.read_csv(args["save_path"] + "_aggregation_strategy_evaluation.csv")

# Aggregate over folds with mean
eval_df = eval_df.groupby(["Aggregation Strategy"]).mean().reset_index()

In [8]:
# Load all_frames_predicted.csv
all_frames_predicted = pd.read_csv(args["save_path"] + "_all_frames_predicted.csv", index_col=0)

In [9]:
# Get number of frames per video_name
# all_frames_predicted.groupby("video_name").count()["Frame"].hist()

In [10]:
# Scores
from sklearn.metrics import recall_score, f1_score, accuracy_score

def get_scores(gt, pred, name=0):
    scores = {
    "accuracy": accuracy_score(gt, pred),
    "sensitivity": recall_score(gt, pred),
    "specifictiy": recall_score(gt, pred, pos_label=0),
    "f1": f1_score(gt, pred),
    }
    # return scores as df
    return pd.DataFrame(scores, index=[name])

In [11]:
all_frames_predicted["gt"] = all_frames_predicted["gt"].astype(int)
all_frames_predicted["predIdxs"] = all_frames_predicted["predIdxs"].astype(int)

scores = get_scores(all_frames_predicted["gt"], all_frames_predicted["predIdxs"], name="all_frames_predicted")

# Aggregate on the video level
video_level = all_frames_predicted.groupby("video_name").agg({"gt": "first", "predIdxs": "mean"}).reset_index()
# Set prediction to 1 if probability is > 0.5
video_level["predIdxs"] = video_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

video_scores = get_scores(video_level["gt"], video_level["predIdxs"], name="video_level")

# Patient level
patient_level = all_frames_predicted.groupby("Patient ID").agg({"gt": "first", "predIdxs": "mean"}).reset_index()
patient_level["predIdxs"] = patient_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

patient_scores = get_scores(patient_level["gt"], patient_level["predIdxs"], name="patient_level")

# Concat scores
scores = pd.concat([scores, video_scores, patient_scores])
# Add model name
scores["model"] = MODEL_TO_LOAD
scores

Unnamed: 0,accuracy,sensitivity,specifictiy,f1,model
all_frames_predicted,0.597655,0.613324,0.580854,0.612024,simple_replicated_vgg_maastricht
video_level,0.606469,0.635468,0.571429,0.638614,simple_replicated_vgg_maastricht
patient_level,0.596774,0.636364,0.551724,0.626866,simple_replicated_vgg_maastricht


In [12]:
# to latex .3f for 3 decimals
print(scores.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{rrrrl}
\toprule
 accuracy &  sensitivity &  specifictiy &    f1 &                            model \\
\midrule
    0.598 &        0.613 &        0.581 & 0.612 & simple\_replicated\_vgg\_maastricht \\
    0.606 &        0.635 &        0.571 & 0.639 & simple\_replicated\_vgg\_maastricht \\
    0.597 &        0.636 &        0.552 & 0.627 & simple\_replicated\_vgg\_maastricht \\
\bottomrule
\end{tabular}



In [13]:
MODEL_TO_LOAD = "continued_vgg_pre_pocovid_maastricht"

# Check if row in model_df
if MODEL_TO_LOAD not in model_df.dir.values:
    raise ValueError("Model not found")

# Get row
row = model_df[model_df.dir == MODEL_TO_LOAD].iloc[0]

args = {
"dataset": row.dataset,
"weights": os.path.join(MODELS_PATH, row.dir),
"m_id": "vgg_base",
"classes": 2,
"folds": 5,
"save_path": os.path.join(MODELS_PATH, row.dir, row.dir + "_results")
}

if os.path.exists(args["save_path"] + "_all_frames_predicted.csv"):
    print(f"Predictions exist for {row.dir}")

args["save_path"]

Predictions exist for continued_vgg_pre_pocovid_maastricht


'/itet-stor/mrichte/covlus_bmicnas02/matthias/trained_models/continued_vgg_pre_pocovid_maastricht/continued_vgg_pre_pocovid_maastricht_results'

In [14]:
# Load all_frames_predicted.csv
all_frames_predicted = pd.read_csv(args["save_path"] + "_all_frames_predicted.csv", index_col=0)

all_frames_predicted["gt"] = all_frames_predicted["gt"].astype(int)
all_frames_predicted["predIdxs"] = all_frames_predicted["predIdxs"].astype(int)

scores = get_scores(all_frames_predicted["gt"], all_frames_predicted["predIdxs"], name="all_frames_predicted")

# Aggregate on the video level
video_level = all_frames_predicted.groupby("video_name").agg({"gt": "first", "predIdxs": "mean"}).reset_index()
# Set prediction to 1 if probability is > 0.5
video_level["predIdxs"] = video_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

video_scores = get_scores(video_level["gt"], video_level["predIdxs"], name="video_level")

# Patient level
patient_level = all_frames_predicted.groupby("Patient ID").agg({"gt": "first", "predIdxs": "mean"}).reset_index()
patient_level["predIdxs"] = patient_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

patient_scores = get_scores(patient_level["gt"], patient_level["predIdxs"], name="patient_level")

# Concat scores
scores = pd.concat([scores, video_scores, patient_scores])
# Add model name
scores["model"] = MODEL_TO_LOAD
scores

Unnamed: 0,accuracy,sensitivity,specifictiy,f1,model
all_frames_predicted,0.591745,0.560399,0.625353,0.586861,continued_vgg_pre_pocovid_maastricht
video_level,0.584906,0.551724,0.625,0.592593,continued_vgg_pre_pocovid_maastricht
patient_level,0.645161,0.606061,0.689655,0.645161,continued_vgg_pre_pocovid_maastricht


In [15]:
# to latex .3f for 3 decimals
print(scores.to_latex(index=False, float_format="%.3f"))

\begin{tabular}{rrrrl}
\toprule
 accuracy &  sensitivity &  specifictiy &    f1 &                                model \\
\midrule
    0.592 &        0.560 &        0.625 & 0.587 & continued\_vgg\_pre\_pocovid\_maastricht \\
    0.585 &        0.552 &        0.625 & 0.593 & continued\_vgg\_pre\_pocovid\_maastricht \\
    0.645 &        0.606 &        0.690 & 0.645 & continued\_vgg\_pre\_pocovid\_maastricht \\
\bottomrule
\end{tabular}

