# Classification
In this notebook we classify the CT/PET scans of lung cancer cases by tumor type. 
We are doing this as a baseline task to validate our data loading pipeline.
Most code is taken from [this tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
from torch.utils.data import random_split
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from lungpetctdx_dataset import LungPetCtDxDataset_TumorPresence
from utils.wandb import start_wandb_run, wandb_watch, wandb_log
from eval.reconstruction_viusalization import compare_images
from utils.wandb import wandb_log, wandb

cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    torch.cuda.empty_cache() 
    
cpu = torch.device("cpu")

torch.cuda.list_gpu_processes()


  from .autonotebook import tqdm as notebook_tqdm


'pynvml module not found, please install pynvml'

# Defining datasets (train/validation) 

In [2]:
IMAGE_RESOLUTION = 128
from lungpetctdx_dataset import LungPetCtDxDataset_TumorPresence
from ct_dataset import NormalizationMethods
postprocess = transforms.Compose([
    transforms.Grayscale()
])
ds = LungPetCtDxDataset_TumorPresence(post_normalize_transform=postprocess,
    normalize=NormalizationMethods.SINGLE_IMAGE, cache=True, subject_count=5) # TODO remove subject_count

trainSet, valSet = ds.subject_split(0.2)#random_split(ds, [0.8, 0.2])
dataloaders = {
    "val": torch.utils.data.DataLoader(valSet, batch_size=64, shuffle=True, num_workers=4),
    "train": torch.utils.data.DataLoader(trainSet, batch_size=64, shuffle=True, num_workers=4),
}
num_classes = len(ds.class_names)
dataset_sizes = {"train": len(trainSet),"val": len(valSet)}
# model = torch.load("test")


Only using 5 subjects


# Defining model 

In [3]:
from capsule_net import CapsNet
from capsnet_config import Config
from train import train_model, plot_train_losses, predicted_indices_from_outputs


def run_train_experiment(config: dict = None):
    with wandb.init(config=config):
        torch.cuda.empty_cache()
        config = wandb.config
        capsConfig = Config(
            cnn_in_channels=1,
            input_width=IMAGE_RESOLUTION,
            input_height=IMAGE_RESOLUTION,
            dc_in_channels=392,
            reconstruction_loss_factor=config.reconstruction_loss_factor,
            dc_num_capsules=num_classes,
            out_capsule_size=config.out_capsule_size,
            # Num labelled 0 tensor(5682)
            # Num labelled 1 tensor(47677)
            class_weights=torch.tensor([1.0, 5682 / 47677.0]).to(device),
        )

        model = CapsNet(capsConfig)

        wandb_watch(model)
        model.to(device)
        # Observe that all parameters are being optimized
        optimizer_ft = optim.Adam(model.parameters(), lr=config.learning_rate)

        # Decay LR by a factor of 0.1 every 7 epochs
        exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

        (
            model,
            best_acc,
            best_loss,
            train_losses,
            best_acc_y_true,
            best_acc_y_pred,
        ) = train_model(
            model,
            exp_lr_scheduler,
            dataloaders["train"],
            dataloaders["val"],
            num_epochs=1,
            on_epoch_done=lambda epoch_result: wandb_log(epoch_result),
            on_batch_done=lambda batch_result: wandb_log(batch_result),
        )

        if best_acc_y_pred is not None and best_acc_y_true is not None:
            wandb.log(
                {
                    "confusion_matrix": wandb.plot.confusion_matrix(
                        preds=best_acc_y_pred.tolist(),
                        y_true=best_acc_y_true.tolist(),
                        class_names=ds.class_names,
                    )
                }
            )
        del model
        return best_acc


sweep_config = {
    "method": "bayes",
    "metric": {"name": "accuracy", "goal": "maximize"},
    "parameters": {
        "reconstruction_loss_factor": {"distribution": "uniform", "min": 0, "max": 0.4},
        "out_capsule_size": {"values": [16, 32, 64]},
        "learning_rate": {"distribution": "uniform", "min": 0.0001, "max": 0.1},
    },
}
sweep_id = wandb.sweep(sweep_config, project="7-vs-capsule")
wandb.agent(sweep_id, run_train_experiment, count=5)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 3ozjet9y
Sweep URL: https://wandb.ai/7-vs-capsule/7-vs-capsule/sweeps/3ozjet9y


[34m[1mwandb[0m: Agent Starting Run: 3w09dkx1 with config:
[34m[1mwandb[0m: 	learning_rate: 0.07402716737229625
[34m[1mwandb[0m: 	out_capsule_size: 64
[34m[1mwandb[0m: 	reconstruction_loss_factor: 0.10247376179596036
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtfiedlerdev[0m ([33m7-vs-capsule[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/1
----------


Run 3w09dkx1 errored: OutOfMemoryError('CUDA out of memory. Tried to allocate 24.50 GiB (GPU 0; 39.45 GiB total capacity; 27.60 GiB already allocated; 9.62 GiB free; 28.37 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 3w09dkx1 errored: OutOfMemoryError('CUDA out of memory. Tried to allocate 24.50 GiB (GPU 0; 39.45 GiB total capacity; 27.60 GiB already allocated; 9.62 GiB free; 28.37 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w9wns7rg with config:
[34m[1mwandb[0m: 	learning_rate: 0.05805527057498624
[34m[1mwan

Epoch 1/1
----------


Run w9wns7rg errored: OutOfMemoryError('CUDA out of memory. Tried to allocate 6.12 GiB (GPU 0; 39.45 GiB total capacity; 35.76 GiB already allocated; 1.74 GiB free; 36.25 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
[34m[1mwandb[0m: [32m[41mERROR[0m Run w9wns7rg errored: OutOfMemoryError('CUDA out of memory. Tried to allocate 6.12 GiB (GPU 0; 39.45 GiB total capacity; 35.76 GiB already allocated; 1.74 GiB free; 36.25 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: syrpjd9k with config:
[34m[1mwandb[0m: 	learning_rate: 0.06242864490356478
[34m[1mwandb

Epoch 1/1
----------


Run syrpjd9k errored: OutOfMemoryError('CUDA out of memory. Tried to allocate 900.00 MiB (GPU 0; 39.45 GiB total capacity; 36.60 GiB already allocated; 877.75 MiB free; 37.13 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
[34m[1mwandb[0m: [32m[41mERROR[0m Run syrpjd9k errored: OutOfMemoryError('CUDA out of memory. Tried to allocate 900.00 MiB (GPU 0; 39.45 GiB total capacity; 36.60 GiB already allocated; 877.75 MiB free; 37.13 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')
Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: To disable this check 