# VGG Model evaluation
This notebook reads the model evaluation results from the VGG model and prints the results used in the Thesis as latex tables.

In [1]:
import pandas as pd
import os
import sys; sys.path.insert(0, "../utils/")
from data_utils import get_data_location

In [2]:
MODELS_PATH = os.path.join(get_data_location(), "matthias/trained_models/")
MODELS_PATH

'/itet-stor/mrichte/covlus_bmicnas02/matthias/trained_models/'

In [3]:
# Create dataframe with all trained models
trained_models = os.listdir(MODELS_PATH)
trained_models = [m for m in trained_models if "vgg" in m]
model_df = pd.DataFrame(trained_models)
model_df_1 = model_df[0].str.replace("simple_", "").str.replace("pre_", "").str.split("_", n=2, expand=True)
model_df = pd.concat([model_df, model_df_1], axis=1)
model_df.columns = ["dir", "train_type", "model_id", "dataset"]
model_df.dataset = model_df.dataset.str.replace("^.*?maastricht", "maastricht", regex=True)

model_df

Unnamed: 0,dir,train_type,model_id,dataset
0,simple_replicated_vgg_covid_us,replicated,vgg,covid_us
1,simple_replicated_vgg_pocovid_covid_us_combined,replicated,vgg,pocovid_covid_us_combined
2,simple_replicated_vgg_pocovid,replicated,vgg,pocovid
3,simple_replicated_vgg_maastricht,replicated,vgg,maastricht
4,continued_vgg_pre_pocovid_maastricht,continued,vgg,maastricht
5,continued_vgg_pre_covid_us_maastricht,continued,vgg,maastricht
6,continued_vgg_pre_combined_maastricht,continued,vgg,maastricht


In [4]:
# Check if predictions exist for all models
for idx, row in model_df.iterrows():
    args = {
    "save_path": os.path.join(MODELS_PATH, row.dir, row.dir + "_results")
    }
    # Check if file exists
    if os.path.exists(args["save_path"] + "_all_frames_predicted.csv"):
        print(f"Predictions exist for {row.dir}")
    else:
        print(f"Predictions do not exist for {row.dir}")

Predictions exist for simple_replicated_vgg_covid_us
Predictions exist for simple_replicated_vgg_pocovid_covid_us_combined
Predictions exist for simple_replicated_vgg_pocovid
Predictions exist for simple_replicated_vgg_maastricht
Predictions exist for continued_vgg_pre_pocovid_maastricht
Predictions exist for continued_vgg_pre_covid_us_maastricht
Predictions exist for continued_vgg_pre_combined_maastricht


In [5]:
MODEL_TO_LOAD = "simple_replicated_vgg_maastricht"

# Check if row in model_df
if MODEL_TO_LOAD not in model_df.dir.values:
    raise ValueError("Model not found")

# Get row
row = model_df[model_df.dir == MODEL_TO_LOAD].iloc[0]

args = {
"dataset": row.dataset,
"weights": os.path.join(MODELS_PATH, row.dir),
"m_id": "vgg_base",
"classes": 2,
"folds": 5,
"save_path": os.path.join(MODELS_PATH, row.dir, row.dir + "_results")
}

if os.path.exists(args["save_path"] + "_all_frames_predicted.csv"):
    print(f"Predictions exist for {row.dir}")

args["save_path"]

Predictions exist for simple_replicated_vgg_maastricht


'/itet-stor/mrichte/covlus_bmicnas02/matthias/trained_models/simple_replicated_vgg_maastricht/simple_replicated_vgg_maastricht_results'

In [6]:
# Load evaluation results dir + results_aggregation_strategy_evaluation.csv
eval_df = pd.read_csv(args["save_path"] + "_aggregation_strategy_evaluation.csv")

# Aggregate over folds with mean
eval_df = eval_df.groupby(["Aggregation Strategy"]).mean().reset_index()

In [7]:
# Load all_frames_predicted.csv
all_frames_predicted = pd.read_csv(args["save_path"] + "_all_frames_predicted.csv", index_col=0)

In [8]:
# Get number of frames per video_name
# all_frames_predicted.groupby("video_name").count()["Frame"].hist()

In [9]:
# Scores
from sklearn.metrics import recall_score, f1_score, accuracy_score

def get_scores(gt, pred, name=0):
    scores = {
    "accuracy": accuracy_score(gt, pred),
    "sensitivity": recall_score(gt, pred),
    "specifictiy": recall_score(gt, pred, pos_label=0),
    "f1": f1_score(gt, pred),
    }
    # return scores as df
    return pd.DataFrame(scores, index=[name])

# Get scores for all folds
def get_fold_scores(df, name=None):
    # Get scores for each fold
    scores = []
    for fold in df.Fold.unique():
        # Get gt and pred
        gt = df[df.Fold == fold]["gt"]
        pred = df[df.Fold == fold]["predIdxs"]
        # Get scores
        scores.append(get_scores(gt, pred, name=fold))
    # Concat scores
    scores = pd.concat(scores)
    scores_mean = scores.mean()
    scores_std = scores.std()
    return scores_mean, scores_std

In [10]:
all_frames_predicted["gt"] = all_frames_predicted["gt"].astype(int)
all_frames_predicted["predIdxs"] = all_frames_predicted["predIdxs"].astype(int)

# Get scores for all folds
scores_mean, scores_std = get_fold_scores(all_frames_predicted, name="Frames")

# Aggregate on the video level
video_level = all_frames_predicted.groupby("video_name").agg({"gt": "first", "predIdxs": "mean", "Fold":"first"}).reset_index()
# Set prediction to 1 if probability is > 0.5
video_level["predIdxs"] = video_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

# Get scores for all folds
scores_mean_video, scores_std_video = get_fold_scores(video_level, name="Video")

# Patient level
patient_level = all_frames_predicted.groupby("Patient ID").agg({"gt": "first", "predIdxs": "mean", "Fold":"first"}).reset_index()
patient_level["predIdxs"] = patient_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

# Get scores for all folds
scores_mean_patient, scores_std_patient = get_fold_scores(patient_level, name="Patient")

# Combine scores_mean and scores_sta and assing names
scores_mean = pd.concat([scores_mean, scores_mean_video, scores_mean_patient], axis=1).T
scores_std = pd.concat([scores_std, scores_std_video, scores_std_patient], axis=1).T
# Add index
scores_mean.index = ["Frame", "Video", "Patient"]
scores_std.index = ["Frame", "Video", "Patient"]
scores_mean

Unnamed: 0,accuracy,sensitivity,specifictiy,f1
Frame,0.596437,0.614094,0.567837,0.603836
Video,0.604292,0.638172,0.563612,0.630106
Patient,0.597552,0.635714,0.518095,0.585758


In [11]:
# Combine scores_mean and scores_std into latex table
# Convert to string
scores_mean_latex = scores_mean.applymap(lambda x: f"{x:.2f}") + " $_{\pm " + scores_std.applymap(lambda x: f"{x:.2f}") + "}$"
# # Accuracy in percent
# scores_mean_latex["accuracy"] = scores_mean["accuracy"].apply(lambda x: "{:.1f}".format(round(x*100,1))+"\%") + " $_{\pm " + scores_std["accuracy"].apply(lambda x: "{:.1f}".format(round(x*100,1))+"\%") + "}$"
# Drop f1
scores_mean_latex = scores_mean_latex.drop("f1", axis=1)

In [12]:
#Print latex table
print(scores_mean_latex.to_latex(escape=False))

\begin{tabular}{llll}
\toprule
{} &            accuracy &         sensitivity &         specifictiy \\
\midrule
Frame   &  0.60 $_{\pm 0.04}$ &  0.61 $_{\pm 0.14}$ &  0.57 $_{\pm 0.20}$ \\
Video   &  0.60 $_{\pm 0.04}$ &  0.64 $_{\pm 0.16}$ &  0.56 $_{\pm 0.23}$ \\
Patient &  0.60 $_{\pm 0.09}$ &  0.64 $_{\pm 0.33}$ &  0.52 $_{\pm 0.32}$ \\
\bottomrule
\end{tabular}



  print(scores_mean_latex.to_latex(escape=False))


In [13]:
MODEL_TO_LOAD = "continued_vgg_pre_pocovid_maastricht"

# Check if row in model_df
if MODEL_TO_LOAD not in model_df.dir.values:
    raise ValueError("Model not found")

# Get row
row = model_df[model_df.dir == MODEL_TO_LOAD].iloc[0]

args = {
"dataset": row.dataset,
"weights": os.path.join(MODELS_PATH, row.dir),
"m_id": "vgg_base",
"classes": 2,
"folds": 5,
"save_path": os.path.join(MODELS_PATH, row.dir, row.dir + "_results")
}

if os.path.exists(args["save_path"] + "_all_frames_predicted.csv"):
    print(f"Predictions exist for {row.dir}")

args["save_path"]

Predictions exist for continued_vgg_pre_pocovid_maastricht


'/itet-stor/mrichte/covlus_bmicnas02/matthias/trained_models/continued_vgg_pre_pocovid_maastricht/continued_vgg_pre_pocovid_maastricht_results'

In [14]:
# Load all_frames_predicted.csv
all_frames_predicted = pd.read_csv(args["save_path"] + "_all_frames_predicted.csv", index_col=0)

all_frames_predicted["gt"] = all_frames_predicted["gt"].astype(int)
all_frames_predicted["predIdxs"] = all_frames_predicted["predIdxs"].astype(int)

# Get scores for all folds
scores_mean, scores_std = get_fold_scores(all_frames_predicted, name="Frames")

# Aggregate on the video level
video_level = all_frames_predicted.groupby("video_name").agg({"gt": "first", "predIdxs": "mean", "Fold":"first"}).reset_index()
# Set prediction to 1 if probability is > 0.5
video_level["predIdxs"] = video_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

# Get scores for all folds
scores_mean_video, scores_std_video = get_fold_scores(video_level, name="Video")

# Patient level
patient_level = all_frames_predicted.groupby("Patient ID").agg({"gt": "first", "predIdxs": "mean", "Fold":"first"}).reset_index()
patient_level["predIdxs"] = patient_level["predIdxs"].apply(lambda x: 1 if x > 0.5 else 0)

# Get scores for all folds
scores_mean_patient, scores_std_patient = get_fold_scores(patient_level, name="Patient")

# Combine scores_mean and scores_sta and assing names
scores_mean = pd.concat([scores_mean, scores_mean_video, scores_mean_patient], axis=1).T
scores_std = pd.concat([scores_std, scores_std_video, scores_std_patient], axis=1).T
# Add index
scores_mean.index = ["Frame", "Video", "Patient"]
scores_std.index = ["Frame", "Video", "Patient"]
scores_mean


Unnamed: 0,accuracy,sensitivity,specifictiy,f1
Frame,0.589186,0.557256,0.619032,0.568195
Video,0.583987,0.545773,0.620441,0.569208
Patient,0.643706,0.60119,0.666667,0.570256


In [15]:
# Combine scores_mean and scores_std into latex table
# Convert to string
scores_mean_latex = scores_mean.applymap(lambda x: f"{x:.2f}") + " $_{\pm " + scores_std.applymap(lambda x: f"{x:.2f}") + "}$"
# # Accuracy in percent
# scores_mean_latex["accuracy"] = scores_mean["accuracy"].apply(lambda x: "{:.1f}".format(round(x*100,1))+"\%") + " $_{\pm " + scores_std["accuracy"].apply(lambda x: "{:.1f}".format(round(x*100,1))+"\%") + "}$"
# Drop f1
scores_mean_latex = scores_mean_latex.drop("f1", axis=1)

In [16]:
#Print latex table
print(scores_mean_latex.to_latex(escape=False))

\begin{tabular}{llll}
\toprule
{} &            accuracy &         sensitivity &         specifictiy \\
\midrule
Frame   &  0.59 $_{\pm 0.07}$ &  0.56 $_{\pm 0.20}$ &  0.62 $_{\pm 0.18}$ \\
Video   &  0.58 $_{\pm 0.07}$ &  0.55 $_{\pm 0.21}$ &  0.62 $_{\pm 0.17}$ \\
Patient &  0.64 $_{\pm 0.11}$ &  0.60 $_{\pm 0.37}$ &  0.67 $_{\pm 0.25}$ \\
\bottomrule
\end{tabular}



  print(scores_mean_latex.to_latex(escape=False))
