# Task 2: Baseline CNN

In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, plot_confusion_matrix, confusion_matrix, ConfusionMatrixDisplay

import torch
from torchvision import datasets, models, transforms
import torch.nn as nn
from torch.nn import functional as F
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, SGD, RMSprop, lr_scheduler
from torch.utils.data import TensorDataset, DataLoader, ConcatDataset

import shap

from data import get_img_dataset
from project3Lib.transforms import EnhanceContrast
import project3Lib.CNN as cnn
from masked_dataset import MaskedDataset
from pathlib import Path
import project3Lib.utils as utils


In [None]:
# load data and transform data if augmentation == yes
augmentation = input("Use augmentation? [yes/no]").lower() == "yes"

unique = input("Use unique images?[yes/no]").lower() == "yes"
input_path = "data/unique_images" if unique else "data/images"

if augmentation:
    transform = [EnhanceContrast(reduce_dim=False)]
    train_dataset,val_dataset, test_dataset = get_img_dataset(transform, data_path=input_path, use_same_transforms = True)
    transform = [EnhanceContrast(reduce_dim=False), transforms.RandomRotation(70), transforms.RandomHorizontalFlip(), transforms.ColorJitter()]
    train_dataset2,val_dataset2, _ = get_img_dataset(transform,data_path=input_path, use_same_transforms = True)
    train_dataset = ConcatDataset([train_dataset,train_dataset2, train_dataset2] )
    val_dataset = ConcatDataset([val_dataset,val_dataset2,val_dataset2])
    
    model_file_path = "baselineCNN_augmented_unique" if unique else "baselineCNN_augmented"
    
else: 
    train_dataset, val_dataset, test_dataset = get_img_dataset(data_path=input_path)
    
    model_file_path = "baselineCNN_unique" if unique else "baselineCNN"

model_file_path = "trained_weights/" + model_file_path
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('Device state:', device)
batch_size = 16
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
validloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
print(f"Class sizes{np.unique([y for x,y in train_dataset], return_counts = True)}")

dataloaders = {
    'train' : trainloader, 
    'validation': validloader
}

image_datasets = {
    'train': train_dataset,
    'validation': val_dataset
}

In [None]:
def train_model(model, criterion, optimizer, dataloaders, image_datasets, patience = 0, num_epochs=3):
    last_loss = 200
    triggertimes = 0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])

            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
                                                        epoch_loss,
                                                        epoch_acc))
            
            # Early stopping
            if phase == 'train' or patience <= 0:
                continue
            
            if epoch_loss > last_loss:
                trigger_times += 1
                if trigger_times >= patience:
                    return best_model
            else:
                trigger_times = 0
                best_model = model

            last_loss = epoch_loss
            
    return best_model

def predict(model, x):
    model.eval()
    out = model(x.reshape(1, 3, 128, 128))   
    _,prediction = torch.max(out, dim=1)
    return prediction[0].item(), out

def test(model, test_dataset):
    x_test = [i for i,j in test_dataset]
    y_test = [j for i,j in test_dataset]
    preds = []
    outs = []
    for t in x_test:
        pred, out = predict(model, t)
        preds.append(pred)

    return accuracy_score(preds,y_test), f1_score(preds,y_test)

## Hyperparameters

In [None]:
# hyperparameters
parameters = {
    "lr": [0.001,0.0005,0.0001],
    "optimizers": ["SGD", "Adam", "RMSprop"]
}

results = {}

epochs = 50
for opt in parameters["optimizers"]:
    for lr in parameters["lr"]:
        model = cnn.BaselineClf()
        criterion = CrossEntropyLoss()

        if opt == "SGD":
            optimizer = SGD(model.parameters(), lr=lr)
        elif opt == "Adam":
            optimizer = Adam(model.parameters(), lr=lr)
        else:
            optimizer = RMSprop(model.parameters(), lr=lr)

        model = train_model(model, criterion, optimizer, dataloaders, image_datasets, 4, num_epochs=epochs)

        val_acc, val_f1 = test(model, val_dataset)

        results[f"{opt}_{lr}"] = (val_acc, val_f1)
        print(f"{opt}_{lr} - acc: {val_acc} - f1: {val_f1}")
    

Optimal hyperparameters for **unique dataset (no augmentation)**: 
```
{'SGD_0.001': (0.4, 0.3333333333333333),
 'SGD_0.0005': (0.7, 0.7692307692307692),
 'SGD_0.0001': (0.65, 0.7741935483870968),
 'Adam_0.001': (0.65, 0.7199999999999999),
 'Adam_0.0005': (0.7, 0.7692307692307692),
 'Adam_0.0001': (0.65, 0.7407407407407408),
 'RMSprop_0.001': (0.75, 0.8148148148148148),
 'RMSprop_0.0005': (0.7, 0.7692307692307692),
 'RMSprop_0.0001': (0.6, 0.6923076923076924)}
```
 
 
Optimal hyperparameters for **augmented dataset**:
 
```
{'SGD_0.001': (0.7333333333333333, 0.7999999999999999),
 'SGD_0.0005': (0.6666666666666666, 0.7560975609756099),
 'SGD_0.0001': (0.7166666666666667, 0.7848101265822784),
 'Adam_0.001': (0.6166666666666667, 0.7578947368421053),
 'Adam_0.0005': (0.5666666666666667, 0.5937499999999999),
 'Adam_0.0001': (0.6666666666666666, 0.7435897435897436),
 'RMSprop_0.001': (0.6833333333333333, 0.7164179104477612),
 'RMSprop_0.0005': (0.5166666666666667, 0.5084745762711865),
 'RMSprop_0.0001': (0.65, 0.7272727272727273)}
```

In [None]:
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

# Train final model

In [None]:
n = 20
accs = []
f1s = []
for i in range(n):
    model = cnn.BaselineClf()
    criterion = CrossEntropyLoss()
    if augmentation:
        optimizer = SGD(model.parameters(), lr=0.001)
    else:
        optimizer = RMSprop(model.parameters(), lr=0.001)
    epochs = 50
    model = train_model(model, criterion, optimizer, dataloaders, image_datasets, 4, num_epochs=epochs)
    acc, f1 = test(model, test_dataset)
    accs.append(acc)
    f1s.append(f1)

In [None]:
torch.save(model.state_dict(), f"{model_file_path}.pt")

In [None]:
a, f = test(model, test_dataset)
print(f"Accuracy mean: {np.mean(accs)} std: {np.std(accs)}")
print(f"F1 mean: {np.mean(f1s)} std: {np.std(f1s)}")
print(f"Score of saved model: acc = {a} and f1 = {f}")

Using unique images according to our filtering method, resulted in our CNN Baseline not converging. Trying out different learning rates, optimizers, early stopping patience and max number of epochs can not solve this issue. To demonstrate this, we report the mean performance scores and their variances from training the baseline model 20 times. The following results can be observed for using **no augmentation**: 

```
    In 20 runs: 
    Accuracy mean: 0.7049999999999998 std: 0.09733961166965893
    F1 mean: 0.706971187025535 std: 0.12677314667331263
```

with **augmentation**: 

```
    In 20 runs: 
    Accuracy mean: 0.7250000000000001 std: 0.07500000000000001
    F1 mean: 0.7149891216454686 std: 0.0951993497268051
```

we suspect, that the baseline CNN has too many parameters (in total 1,606,802 parameters). 

In [None]:
x_test = [i for i,j in test_dataset]
y_test = [j for i,j in test_dataset]
preds = []
outs = []
for t in x_test:
    pred, out = predict(model, t)
    preds.append(pred)
    
cm=confusion_matrix(y_test,preds,normalize="true")
cmd = ConfusionMatrixDisplay(cm)
cmd.plot()

**No augmentation model** with accuracy = 0.75 and f1 = 0.7826086956521738

![](Plots/CM_Baseline_Unique_NoAugmentation.png)

**Augmentation model** with acc = 0.75 and f1 = 0.761904761904762

In [None]:
model = cnn.BaselineClf()
model.load_state_dict(torch.load(f"{model_file_path}.pt"))
model.eval()

In [None]:
test_dataset_nomasks = test_dataset
if augmentation: 
    common_transform = [EnhanceContrast(reduce_dim=False)]
    _,_, test_dataset = get_img_dataset(common_transforms=common_transform, \
                                        data_path=input_path, \
                                        folder_type = MaskedDataset, \
                                        mask_folder=Path("data/masks"))
else:
    transform = []
    common_transform = []
    _,_, test_dataset = get_img_dataset(transform = transform, \
                                        use_same_transforms=True, \
                                        common_transforms=common_transform, \
                                        data_path=input_path, \
                                        folder_type = MaskedDataset, \
                                        mask_folder=Path("data/masks"))

## SHAP

In [None]:
# Deep Explainer
bg = [i for i,j in train_dataset]
bg = torch.stack(bg)
e = shap.DeepExplainer(model, bg)
outs = []
for i in bg:
    pred, out = predict(model,i)
    outs.append((out[0][0].item(), out[0][1].item()))
print(f"Mean values {np.mean([i for i,j in outs])}, {np.mean([j for i,j in outs])}")

In [None]:
ious = []
for i, (image,mask,target) in enumerate(test_dataset):
    image = image.reshape((1,3,128,128))
    pred, out = predict(model,image)
    
    shap_values = e.shap_values(image)
    shap_numpy = [np.swapaxes(np.swapaxes(s, 1, -1), 1, 2) for s in shap_values]
    test_numpy = np.swapaxes(np.swapaxes(image.cpu().numpy(), 1, -1), 1, 2)
    print(f"Image #{i}: True Class {target}, Prediction {pred}, Probabilities {out}")
    shap.image_plot(shap_numpy, test_numpy, labels = ["SHAP for class 0","SHAP for class 1"])
    
    predicted_mask = np.copy(shap_values[1].reshape(3,128,128))
    mask = mask.reshape((128,128))
    mask = torch.stack([mask, mask,mask])
    pixels = int(np.sum(mask.numpy().flatten()))
    iou = utils.evaluate_interpretability(predicted_mask, mask,pixels)
    print(iou)
    if target == 1:
        ious.append(iou)
    if i == 0:
        np.save("Plots/Baseline_SHAP_0", predicted_mask)
    if i == 1:
        a = predicted_mask
        np.save("Plots/Baseline_SHAP_1", predicted_mask)
print(f"Mean IOU: {np.mean(ious)}")

**No augmentation**
```
0.0711
```
**augmentation**
```
0.09133420643751193
```

## Integrated Gradient

In [None]:
ious = []
for i, (image,mask,target) in enumerate(test_dataset):
    data = (image,target)
    
    if target == 1: 
        class_1, class_0 = utils.plot_grads(data,model, layer_idx = -1,plot=False,grad_type= "integ_grads")
    else:
        class_0, class_1 = utils.plot_grads(data,model, layer_idx = -1,plot=False,grad_type= "integ_grads")
    predicted_mask = np.copy(class_1.reshape(3,128,128))
    mask = mask.reshape((128,128))
    mask = torch.stack([mask, mask,mask])
    pixels = int(np.sum(mask.numpy().flatten()))
    iou = utils.evaluate_interpretability(predicted_mask, mask,pixels)
    print(iou)
    if target == 1:
        ious.append(iou)
    if i == 0:
        np.save("Plots/Baseline_Augmentation_IntGrad_0", predicted_mask)
    if i == 1:
        np.save("Plots/Baseline_Augmentation_IntGrad_1", predicted_mask)
print(f"The mean iou is {np.mean(ious)}")

No augmentation
```
0.08154682005780925
```

Augmentation: 0.08734029258400018

In [None]:
utils.plot_grads_dataloader(test_dataset_nomasks, model, grad_type= "integ_grads" ,plot=True, save_name="baseline_augmentation")

## GradCam

In [None]:
ious = []
for i, (image,mask, target) in enumerate(test_dataset):
    data = (image,target)
    a, b = utils.plot_grads(data,model, layer_idx = 6,plot=False,grad_type= "grad_cam")
    if target ==1:
        class_1 = a
    else:
        class_1 = b
    predicted_mask = np.copy(class_1.detach().numpy().reshape(128,128))
    mask = mask.reshape((128,128))
    #mask = torch.stack([mask, mask,mask])
    pixels = int(np.sum(mask.numpy().flatten()))
    iou = utils.evaluate_interpretability(predicted_mask, mask,pixels)
    print(iou)
    if target == 1:
        ious.append(iou)
    if i == 0:
        np.save("Plots/Baseline_Augmentation_GradCam_0", predicted_mask)
    if i == 1:
        np.save("Plots/Baseline_Augmentation_GradCam_1", predicted_mask)
print(f"The mean iou is {np.mean(ious)}")

No augmentation: 
```
The mean iou is 0.03695559681435451
```
Augmentation:
```
The mean iou is 0.2043
```

In [None]:
utils.plot_grads_dataloader(test_dataset_nomasks, model, grad_type= "grad_cam" ,plot=True,layer_idx=6, save_name="cnn")