# How good are the models at generalization to other datasets? 


In [6]:
mode = "LS" # LS or IS (latent/image)
import sys
import os
import numpy as np
from sklearn import metrics
utils_path = os.path.abspath(os.path.join('../../'))
if utils_path not in sys.path:
    sys.path.append(utils_path)
from utils.notebookutils import SimaseUSLatentDataset,SimaseUSVideoDataset, SiameseNetwork, model_forward_to_corrcoeff,model_forward_to_pred, model_forward_to_bin_pred, model_forward_to_corr_coeff

### Load models and dataset 

In [3]:
import torch
import json

normalization =lambda x: (x  - x.min())/(x.max() - x.min()) * 2 - 1  # should be -1 to 1 due to way we trained the model

#datasets
if mode == "LS": 
    ds_test_dynamic = SimaseUSLatentDataset(phase="testing", transform=normalization, latents_csv="/vol/ideadata/at70emic/projects/TMI23/data/diffusion/dynamic/FileList.csv", training_latents_base_path="/vol/ideadata/at70emic/projects/TMI23/data/diffusion/dynamic/Latents", in_memory=False, generator_seed=0)
    ds_test_psax = SimaseUSLatentDataset(phase="testing", transform=normalization, latents_csv= "/vol/ideadata/at70emic/projects/TMI23/data/diffusion/PSAX/FileList.csv", training_latents_base_path= "/vol/ideadata/at70emic/projects/TMI23/data/diffusion/PSAX/Latents", in_memory=False, generator_seed=0)
    ds_test_a4c = SimaseUSLatentDataset(phase="testing", transform=normalization, latents_csv="/vol/ideadata/at70emic/projects/TMI23/data/diffusion/A4C/FileList.csv", training_latents_base_path= "/vol/ideadata/at70emic/projects/TMI23/data/diffusion/A4C/Latents", in_memory=False, generator_seed=0)
else: 

    ds_test_dynamic = SimaseUSVideoDataset(phase="testing", transform=normalization, latents_csv="/vol/ideadata/at70emic/datasets/EchoNet-Dynamic/FileList.csv", training_latents_base_path= "/vol/ideadata/at70emic/datasets/EchoNet-Dynamic/Videos", in_memory=False, generator_seed=0)
    ds_test_psax = SimaseUSVideoDataset(phase="testing", transform=normalization, latents_csv= "/vol/ideadata/at70emic/datasets/Echonet-Peds/PSAX/processed/FileList.csv", training_latents_base_path= "/vol/ideadata/at70emic/datasets/Echonet-Peds/PSAX/processed/Videos", in_memory=False, generator_seed=0)
    ds_test_a4c = SimaseUSVideoDataset(phase="testing", transform=normalization, latents_csv="/vol/ideadata/at70emic/datasets/Echonet-Peds/A4C/processed/FileList.csv", training_latents_base_path= "/vol/ideadata/at70emic/datasets/Echonet-Peds/A4C/processed/Videos", in_memory=False, generator_seed=0)

datasets = {"d": ds_test_dynamic, "p": ds_test_psax, "a": ds_test_a4c}
ds_name_to_name = {"d": "Dynamic", "p": "PSAX", "a": "A4C"}

#load models
models = {"a": None, "d": None, "p": None}
for model_name, model_ending in zip(["a", "d", "p"], ["a4c", "Dynamic", "psax"]): 
    model_basepath = f"/vol/ideadata/ed52egek/pycharm/privatis_us/archive/{model_ending}{mode}Best"
    with open(os.path.join(model_basepath, "config.json")) as config:
        config = config.read()

    # parse config
    config = json.loads(config)
    net = SiameseNetwork(network=config['siamese_architecture'], in_channels=config['n_channels'], n_features=config['n_features'])
    net.eval()
    net = net.cuda()
    best_name = [x for x in os.listdir(model_basepath) if x.endswith("best_network.pth")][0]
    net.load_state_dict(torch.load(os.path.join(model_basepath, best_name)))
    models[model_name] = net


Set testing dataset seed to 0
Set testing dataset seed to 0
Set testing dataset seed to 0




In [5]:
print(f"Results for {mode}")
for ds_name in ["d", "a", "p"]:
    for model_name in ["d", "a", "p"]:
        corr_preds = []
        for i in range(len(datasets[ds_name])):
            pred = model_forward_to_corrcoeff(models[model_name], datasets[ds_name][i)
            corr_preds.append(pred.flatten())

        corr_preds = torch.cat(corr_preds)
        mean = float(corr_preds.mean())
        std = float(corr_preds.std())

        # Output LaTeX formatted string
        print(f"Results for Train {ds_name_to_name[model_name]} -- Test: {ds_name_to_name[ds_name]}:      " + f"${mean:.2f} \\pm {std:.2f}$")

Results for IS
Results for Train Dynamic -- Test: Dynamic:      $0.76 \pm 0.12$
Results for Train A4C -- Test: Dynamic:      $0.72 \pm 0.16$
Results for Train PSAX -- Test: Dynamic:      $0.73 \pm 0.12$
Results for Train Dynamic -- Test: A4C:      $0.71 \pm 0.15$
Results for Train A4C -- Test: A4C:      $0.71 \pm 0.14$
Results for Train PSAX -- Test: A4C:      $0.59 \pm 0.21$
Results for Train Dynamic -- Test: PSAX:      $0.71 \pm 0.15$
Results for Train A4C -- Test: PSAX:      $0.70 \pm 0.15$
Results for Train PSAX -- Test: PSAX:      $0.68 \pm 0.13$


In [12]:
import numpy as np
from sklearn import metrics

for ds_name in ["d", "a", "p"]:
    for model_name in ["d", "a", "p"]:
        # set same seat for each run to ensure comparability
        y_pred = []
        y_true = []

        dataset = datasets[ds_name]
        dataset.reset_generator()
        model = models[model_name]
        for i in range(len(dataset)):
            if dataset.generator.uniform() < 0.5: 
                y = 0 
                vid_a = dataset[i]
                vid_b = torch.clone(dataset.get_vid((i + dataset.generator.integers(low=1, high=len(dataset))) % len(dataset))) # random different vid
            else: 
                y = 1
                vid_a = dataset[i] 
                vid_b = dataset[i] 

            frame_a = dataset.generator.integers(len(vid_a))
            frame_b = (frame_a + dataset.generator.integers(low=1, high=len(vid_b))) % len(vid_b)

            y_pred.append(model_forward_to_pred(model, vid_a[frame_a].unsqueeze(dim=0), vid_b[frame_b].unsqueeze(dim=0)))
            y_true.append(y)
            #if i == 2: 
            #   break

        y_pred = np.concatenate(y_pred)
        y_true = np.stack(y_true)

        auc = metrics.roc_auc_score(y_true, y_pred)
        print(f"Results for Train {ds_name_to_name[model_name]} -- Test: {ds_name_to_name[ds_name]}:      " + f"{auc:.3f}")



Results for Train Dynamic -- Test: Dynamic:      1.000
Results for Train A4C -- Test: Dynamic:      0.994
Results for Train PSAX -- Test: Dynamic:      0.996
Results for Train Dynamic -- Test: A4C:      0.995
Results for Train A4C -- Test: A4C:      1.000
Results for Train PSAX -- Test: A4C:      0.984
Results for Train Dynamic -- Test: PSAX:      0.997
Results for Train A4C -- Test: PSAX:      1.000
Results for Train PSAX -- Test: PSAX:      0.995


In [39]:
def model_forward_to_abs(model, input_a, input_b, bs=256): 
    # two single frame videos --> corr coeff according to model 
    coeffs = []
    with torch.no_grad():
        for i in np.arange(0, len(input_a), bs):
            batch_a = input_a[i:i+bs].cuda()
            batch_b = input_b[i:i+bs].cuda()
            feature_a = model.forward_once(batch_a)
            feature_b = model.forward_once(batch_b)
        coeffs.append(-1 *  torch.abs(feature_a - feature_b).mean().cpu())
    coeffs = torch.stack(coeffs)
    return coeffs

def model_forward_to_mse(model, input_a, input_b, bs=256): 
    # two single frame videos --> corr coeff according to model 
    coeffs = []
    with torch.no_grad():
        for i in np.arange(0, len(input_a), bs):
            batch_a = input_a[i:i+bs].cuda()
            batch_b = input_b[i:i+bs].cuda()
            feature_a = model.forward_once(batch_a)
            feature_b = model.forward_once(batch_b)
        coeffs.append(-1 * ((feature_a - feature_b)**2).mean().cpu())
    coeffs = torch.stack(coeffs)
    return coeffs



In [34]:
for PREDICTION_MODE in ["mse", "abs", "fcout", "corr"]: 
    aucs = []
    print("="*80)
    print(f"PREDICTION_MODE: {PREDICTION_MODE}")
    latex = ""
    for ds_name in ["d", "a", "p"]:
        for model_name in ["d", "a", "p"]:
            # set same seat for each run to ensure comparability
            y_pred = []
            y_true = []

            dataset = datasets[ds_name]
            dataset.reset_generator()
            model = models[model_name]
            for i in range(len(dataset)):

                if dataset.generator.uniform() < 0.5: 
                    y = 0 
                    vid_a = dataset[i]
                    vid_b = torch.clone(dataset.get_vid((i + dataset.generator.integers(low=1, high=len(dataset))) % len(dataset))) # random different vid
                else: 
                    y = 1
                    vid_a = dataset[i] 
                    vid_b = dataset[i] 

                frame_a = dataset.generator.integers(len(vid_a))
                frame_b = (frame_a + dataset.generator.integers(low=1, high=len(vid_b))) % len(vid_b)
                if PREDICTION_MODE == "corr":
                    y_pred_cur = model_forward_to_corr_coeff(model, vid_a[frame_a].unsqueeze(dim=0), vid_b[frame_b].unsqueeze(dim=0))
                elif PREDICTION_MODE == "fcout": 
                    y_pred_cur = model_forward_to_pred(model, vid_a[frame_a].unsqueeze(dim=0), vid_b[frame_b].unsqueeze(dim=0))
                elif PREDICTION_MODE == "abs": 
                    y_pred_cur = model_forward_to_abs(model, vid_a[frame_a].unsqueeze(dim=0), vid_b[frame_b].unsqueeze(dim=0))
                elif PREDICTION_MODE == "mse": 
                    y_pred_cur = model_forward_to_mse(model, vid_a[frame_a].unsqueeze(dim=0), vid_b[frame_b].unsqueeze(dim=0))
                else: 
                    raise ValueError("Unknown prediction type")

                y_pred.append(y_pred_cur)
                y_true.append(y)
                #if i == 2: 
                #   break

            y_pred = np.concatenate(y_pred)
            y_true = np.stack(y_true)

            auc = metrics.roc_auc_score(y_true, y_pred)
            aucs.append(auc)
            latex += f" {auc:.3f} &"
            print(f"Results for Train {ds_name_to_name[model_name]} -- Test: {ds_name_to_name[ds_name]}:      " + f"{auc:.3f}")
    mean = float(np.stack(aucs).mean())
    latex += f" {mean:.3f}"
    print(f"Unweighted mean of all datasets for prediction mode {PREDICTION_MODE}: {mean}")
    print(latex)


PREDICTION_MODE: mae
Results for Train Dynamic -- Test: Dynamic:      0.995
Results for Train A4C -- Test: Dynamic:      0.978
Results for Train PSAX -- Test: Dynamic:      0.986
Results for Train Dynamic -- Test: A4C:      0.998
Results for Train A4C -- Test: A4C:      0.986
Results for Train PSAX -- Test: A4C:      0.989
Results for Train Dynamic -- Test: PSAX:      0.998
Results for Train A4C -- Test: PSAX:      0.996
Results for Train PSAX -- Test: PSAX:      0.999
Unweighted mean of all datasets for prediction mode mae: 0.9916936256523824
 0.995 & 0.978 & 0.986 & 0.998 & 0.986 & 0.989 & 0.998 & 0.996 & 0.999 & 0.992
PREDICTION_MODE: abs
Results for Train Dynamic -- Test: Dynamic:      0.996
Results for Train A4C -- Test: Dynamic:      0.987
Results for Train PSAX -- Test: Dynamic:      0.990
Results for Train Dynamic -- Test: A4C:      0.998
Results for Train A4C -- Test: A4C:      0.986
Results for Train PSAX -- Test: A4C:      0.990
Results for Train Dynamic -- Test: PSAX:      

## I took the auc values of the original training runs to stay consistent. These are even better (perfect for dynamic, psax)