This file as is works only with the train/validation split

In [None]:
import torch
from pathlib import Path
import os
import matplotlib.pyplot as plt
import numpy as np
from data_utils import SERDataset
from sklearn.metrics import f1_score

ipynb_path = os.path.dirname(os.path.realpath("__file__"))
cur_path = Path(ipynb_path)


# Evaluate a model train history


In [None]:
# Choose model and weights to load
saved_model_path = (cur_path / "saved_models")
load_checkpoint_path = (saved_model_path / "MMFUSION_train" / "MMFUSION.tar")

from MMFUSION.data import load_inference_model


In [None]:
# Load model
model_collation, history = load_inference_model(load_checkpoint_path)

# Show train history
train_loss = [e["train_loss"] for e in history]
val_loss = [e["val_loss"] for e in history]
train_acc = [e["train_acc"] for e in history]
val_acc = [e["val_acc"] for e in history]

plt.title("Model Loss")
plt.plot(range(len(train_loss)), train_loss)
plt.plot(range(len(val_loss)), val_loss)
plt.show()

plt.title("Model Accuracy")
plt.plot(range(len(train_acc)), train_acc)
plt.plot(range(len(val_acc)), val_acc)
plt.show()

print("Best epoch for val accuracy:", np.argmax(val_acc)+1)
print("Best epoch for val loss:", np.argmin(val_loss)+1)
print(val_acc[123])

# Evaluate a model on the validation set

In [None]:
saved_model_path = (cur_path / "saved_models")
load_checkpoint_path = (saved_model_path / "MMFUSION_train" / "MMFUSION.tar")

from MMFUSION.data import load_inference_model, init_test_dataset

device = "cuda"


In [None]:
from torch.utils.data import DataLoader

# Load model
model_collation, history = load_inference_model(load_checkpoint_path)

# Initialize dataset
valset_info_path = (cur_path /"datasets/labeled_data/2022challengeA_val.csv" ).resolve()
val_audio_folder_path = (cur_path / "datasets/labeled_data/val" ).resolve()

t_dataset = init_test_dataset(valset_info_path, val_audio_folder_path, SERDataset)


# Load model
inference_model = model_collation["model"].to(device)
inference_model.eval()
inference_pipeline = model_collation["inference_pipeline"]

# Batch and start evaluation
batched_test = DataLoader(t_dataset, batch_size=6, shuffle=True, num_workers = 2)


print(f"Evaluating metrics on validation set")
y_preds = []
ys = []
for batch, batch_data in enumerate(batched_test):

    X = inference_pipeline(batch_data["soundwave"]).to(device)
    #X = (x.to(device) for x in X)
    y = batch_data["emotion"].to(device)

    probs = inference_model(X)

    # Compute metrics
    y_pred = torch.argmax(probs, dim=-1)

    y_preds += [yp.item() for yp in y_pred] if len(y_pred.shape)>0 else [y_pred.item()]
    ys += [y1.item() for y1 in y] if len(y.shape)>0 else [y.item()]
    #print(batch)

print("Results:")
print("F1 macro: ", f1_score(ys,y_preds, average="macro"))
print("Accuracy:", np.mean(np.array(ys)==np.array(y_preds)))