# Parsing results notebook

Selecting devices...

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0" # Will use only 1 GPU, please adjust to your GPU config

Imports...

In [None]:
import sys
sys.path.insert(1, "..")
from datasets.ssl_dataset import SSL_Dataset
from datasets.data_utils import get_data_loader
import matplotlib.pyplot as plt
import torch
from glob import glob
from utils import net_builder
from train_utils import mcc
import pandas as pd
from tqdm import tqdm

Loading dataset...

In [None]:
MSMatch_dir = "..insert..your..path..here.."
# example:
MSMatch_dir = "/home/roberto/PythonProjects/END2END/MSMatch"
dataset_dir= os.path.join(MSMatch_dir, "data")  # "/home/gabrielemeoni/project/END2END/MSMatch/data/"
test_dset = SSL_Dataset(name="thraws_swir_test", train=False, data_dir=dataset_dir)

test_dset_basic = test_dset.get_dset()
num_classes = test_dset.num_classes
num_channels = test_dset.num_channels
eval_loader = get_data_loader(test_dset_basic, 8, num_workers=1)
    

Checkpoint path...

In [None]:
checkpoint_paths=[os.path.join(MSMatch_dir, "checkpoints/"+x) for x in ["final_train_supervised","final_train_supervised_no_weights","final_training_msmatch_trained"]]

Checking various checkpoints....

In [None]:
checkpoints_names_list=[]
checkpoint_results_list=[]
for checkpoint_path in tqdm(checkpoint_paths, desc="Checking checkpoint"):  
    checkpoints_names_list.append(checkpoint_path.split(os.sep)[-1])
    test_checkpoint_seed_paths=sorted(glob(os.path.join(checkpoint_path, "*")))
    seeds=[]
    test_results_mcc_seed=[]
    test_results_acc_seed=[]

    for test_seed_path in tqdm(test_checkpoint_seed_paths, desc="Processing seed..."):
        seeds.append(int(test_seed_path.split(os.sep)[-1].split("_")[-1]))
        upsampling_values_test_paths=sorted(glob(os.path.join(test_seed_path, "*")))
        test_results_acc_seed_upsample=[]
        test_results_mcc_seed_upsample=[]
        upsampling_values=[]
        for test in upsampling_values_test_paths:
            upsampling_values.append(int(test.split("_{")[1].split("}")[0]))

            #Exploring the whole path until you reach the final directory
            while(len(glob(os.path.join(test, "*"))) == 1):
                test=os.path.join(test, glob(os.path.join(test, "*"))[0])

            checkpoint_path = os.path.join(test, "model_best.pth")
            checkpoint = torch.load(checkpoint_path)
            load_model = (checkpoint["eval_model"])

            _net_builder = net_builder(
                "efficientnet-lite0",
                False,
                {
                    "depth": 28,
                    "widen_factor": 2,
                    "leaky_slope": 0.1,
                    "dropRate": 0.0,
                },
            )
            net = _net_builder(num_classes=num_classes, in_channels=num_channels)
            net.load_state_dict(load_model)
            if torch.cuda.is_available():
                net.cuda()
            net.eval()

            acc = 0.0
            y_true=[]
            y_pred=[]
            n=0
            with torch.no_grad():
                for image, target in eval_loader:
                    image = image.type(torch.FloatTensor).cuda()
                    logit = net(image)
                    y_pred+=list(logit.cpu().max(1)[1])
                    y_true+=list(target)
                    acc += logit.cpu().max(1)[1].eq(target).sum().numpy()

                    if n == 0:
                            pred=logit
                            correct=target
                            n+=1
                    else:
                        pred=torch.cat((pred, logit), axis=0)
                        correct=torch.cat((correct, target), axis=0)
            test_results_acc_seed_upsample.append(acc / len(test_dset_basic))
            test_results_mcc_seed_upsample.append(mcc(pred, correct))

        upsampling_values=["up_" + str(upsampling_value) for upsampling_value in upsampling_values]
        print(upsampling_values)
        test_results_acc_seed.append(dict(zip(upsampling_values, test_results_acc_seed_upsample)))
        test_results_mcc_seed.append(dict(zip(upsampling_values, test_results_mcc_seed_upsample)))

    seeds_sorted_idx=sorted(range(len(seeds)),key=seeds.__getitem__)
    seeds_sorted=["seed_"+str(seeds[n]) for n in seeds_sorted_idx]
    test_results_acc_seed_sorted=[test_results_acc_seed[n] for n in seeds_sorted_idx]
    test_results_mcc_seed_sorted=[test_results_mcc_seed[n] for n in seeds_sorted_idx]

    tests_results_acc_dict=dict(zip(seeds_sorted, test_results_acc_seed_sorted))
    tests_results_mcc_dict=dict(zip(seeds_sorted, test_results_mcc_seed_sorted))
    checkpoint_results_list.append({"acc" : tests_results_acc_dict, "mcc" : tests_results_mcc_dict})
checkpoints_results_dict=dict(zip(checkpoints_names_list, checkpoint_results_list))

## Supervised training (weighted) results

### Accuracy results.

In [None]:
acc_df_supervised_weighted=pd.DataFrame.from_dict(checkpoints_results_dict["final_train_supervised"]["acc"], orient='index',columns=upsampling_values)
acc_df_supervised_weighted

### MCC results.

In [None]:
mcc_df_supervised_weighted=pd.DataFrame.from_dict(checkpoints_results_dict["final_train_supervised"]["mcc"], orient='index',columns=upsampling_values)
mcc_df_supervised_weighted

## Supervised training un-weighted results

### Accuracy results.

In [None]:
acc_df_supervised_no_weighted=pd.DataFrame.from_dict(checkpoints_results_dict["final_train_supervised_no_weights"]["acc"], orient='index',columns=upsampling_values)
acc_df_supervised_no_weighted

### MCC results.

In [None]:
mcc_df_supervised_no_weighted=pd.DataFrame.from_dict(checkpoints_results_dict["final_train_supervised_no_weights"]["mcc"], orient='index',columns=upsampling_values)
mcc_df_supervised_no_weighted

## BEST Results

In [None]:
acc_best_df=pd.DataFrame.copy(acc_df_supervised_no_weighted)
mcc_best_df=pd.DataFrame.copy(mcc_df_supervised_no_weighted)

In [None]:
for key in ["up_2", "up_3", "up_4", "up_6", "up_7"]:
    for seed in ["seed_0", "seed_9", "seed_14", "seed_18", "seed_19"]:
        acc_best_df.loc[seed][key]=max(acc_df_supervised_no_weighted.loc[seed][key], acc_df_supervised_weighted.loc[seed][key])
        mcc_best_df.loc[seed][key]=max(mcc_df_supervised_no_weighted.loc[seed][key], mcc_df_supervised_weighted.loc[seed][key])

### Best accuracy results

In [None]:
acc_best_df

In [None]:
best_acc_mean=acc_best_df.iloc[:-1].mean(axis=0)
pd.DataFrame(dict(zip(list(best_acc_mean.keys().values), list(best_acc_mean.values))), index=[0])

### Best MCC results

In [None]:
mcc_best_df

In [None]:
best_mcc_mean=mcc_best_df.iloc[:-1].mean(axis=0)
pd.DataFrame(dict(zip(list(best_mcc_mean.keys().values), list(best_mcc_mean.values))), index=[0])

## MSMatch results

### Accuracy results.

In [None]:
acc_df_msmatch_weighted=pd.DataFrame.from_dict(checkpoints_results_dict["final_training_msmatch_trained"]["acc"], orient='index',columns=upsampling_values)
acc_df_msmatch_weighted

### MCC results.

In [None]:
mcc_df_msmatch_weighted=pd.DataFrame.from_dict(checkpoints_results_dict["final_training_msmatch_trained"]["mcc"], orient='index',columns=upsampling_values)
mcc_df_msmatch_weighted

### Best results.

In [None]:
mcc_df_msmatch_weighted_mean=mcc_df_msmatch_weighted.iloc[:-1].mean(axis=0)
pd.DataFrame(dict(zip(list(mcc_df_msmatch_weighted_mean.keys().values), list(mcc_df_msmatch_weighted_mean.values))), index=[0])

## Checking missclassification

Checking missclassifications for the model having the best MCC. 

Path to the best model.

In [None]:
best_model_path="/home/gabrielemeoni/project/END2END/MSMatch/checkpoints/final_train_supervised/Seed_14/hyperExplore_upsTrain_{7}_upsEval_{1}/thraws_swir_train/FixMatch_archefficientnet-lite0_batch8_confidence0.95_lr0.03_uratio4_wd0.00075_wu1.0_seed14_numlabels800_optSGD/model_best.pth"

In [None]:
#Changing batch to 1
eval_loader = get_data_loader(test_dset_basic, 1, num_workers=1)
checkpoint = torch.load(best_model_path)
load_model = (checkpoint["eval_model"])

_net_builder = net_builder(
    "efficientnet-lite0",
    False,
    {
        "depth": 28,
        "widen_factor": 2,
        "leaky_slope": 0.1,
        "dropRate": 0.0,
    },
)
net = _net_builder(num_classes=num_classes, in_channels=num_channels)
net.load_state_dict(load_model)
if torch.cuda.is_available():
    net.cuda()
net.eval()

y_true=[]
y_pred=[]
x_wrong=[]
with torch.no_grad():
    for image, target in tqdm(eval_loader, desc="Checking best model..."):
        image = image.type(torch.FloatTensor).cuda()
        logit = net(image)
        
        if (logit.cpu().max(1)[1] != target):
            x_wrong.append(image)
            y_pred+=list(logit.cpu().max(1)[1])
            y_true+=list(target)

        if n == 0:
                pred=logit
                correct=target
                n+=1
        else:
            pred=torch.cat((pred, logit), axis=0)
            correct=torch.cat((correct, target), axis=0)



In [None]:
import matplotlib.pyplot as plt
import numpy as np
fig, ax=plt.subplots(2,3, figsize=(20, 20))
event_dict={"0" : "event", "1" : "not_event"}

plt.rcParams.update({'font.size': 18})
for n, x in enumerate(x_wrong):
    x=np.transpose(x.squeeze(0).detach().cuda().cpu().numpy(), (2,1,0))
    ax[int(n/3), int(n%3)].imshow(x/x.max())
    ax[int(n/3), int(n%3)].set_title("predicted: "+str(event_dict[str(int(y_pred[n]))])+"\ntrue: "+str(event_dict[str(int(y_true[n]))]))
    ax[int(n/3), int(n%3)].tick_params(left = False, right = False , labelleft = False , labelbottom = False, bottom = False)
fig.tight_layout()
