# Braintriage challenge example

# Import packages
Here we import important packages. Add any packages you need.

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
import os
import SimpleITK as sitk
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import math as math
import csv
import pandas as pd
from tqdm.notebook import tqdm
import pdb
from torchvision import models

## reload scripts before executing them
%load_ext autoreload
%autoreload 2

## importing python files from another directory
from dataset.create_slices import generate_slice_data
from dataset.patient_dataframes import get_patient_train_val_dataframes
from dataset.patient_dataset import PatientDataset
from dataset.slice_dataframes import get_slice_train_val_dataframes
from dataset.slice_dataset import SliceDataset
from visualisation.slice_plotter import plot_slices

from models.feature_vector_model import FeatureVectorModel

## We need to make sure the Net class is only loaded once!
## This is because the Net class calls the super(nn.Module) with itself as parameter
## if we reload the class before every execution then the parameter will be another instantiantion and not "itself"
from models.omnipotent_resnet import Net
from models.combined_net import CombinedNet
from models.lstm import LSTM
%aimport - models.omnipotent_resnet, models.combined_net. models.lstm
from train.train import Trainer
%aimport

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Modules to reload:
all-except-skipped

Modules to skip:
 models.omnipotent_resnet, models.combined_net. models.lstm


## Intro
This notebook serves as an example of how we could work in the future

# Create dataset


In [24]:
in_dir = '../../data/full'
out_dir = '../../data_sliced'


In [37]:
## generate slice data now accepts two parameters namely:
## IN_DIR location of the raw ata
## OUT_DIR preferred output location of the slices
generate_slice_data(in_dir,out_dir)

HBox(children=(FloatProgress(value=0.0, description='Patients', max=500.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Patients', max=500.0, style=ProgressStyle(description_wid…




In [38]:
DATA_DIR = out_dir

In [39]:
label_df = pd.read_csv(os.path.join(DATA_DIR,"labels_slices.csv"), names = ["patient_nr", "slice_nr", "class"])
label_df["class"] = label_df["class"].astype("int8")
patient_list = np.unique(label_df["patient_nr"])
print(label_df.head(), f"Dataframe shape: {label_df.shape}", sep="\n")
print(f"\nNumber of unique patient numbers: {len(np.unique(label_df['patient_nr']))}")
print(f"Number of unique slice numbers:   {len(np.unique(label_df['slice_nr']))}")
print(f"Number of unique class values:    {len(np.unique(label_df['class']))}")

   patient_nr  slice_nr  class
0         797         0      1
1         797         1      1
2         797         2      1
3         797         3      1
4         797         4      1
Dataframe shape: (6336, 3)

Number of unique patient numbers: 100
Number of unique slice numbers:   32
Number of unique class values:    2


In [40]:
plot_slices(797, (0,31), DATA_DIR)

TypeError: plot_slices() missing 1 required positional argument: 'row_col_number'

We create a dataset class that is compatible with the 'data_loader' from PyTorch. This allows us to feed our images to the network efficiently during training and validation. We read the labels for the different patients from a CSV file and convert this into a dictionary. Based on these labels we split the data in a training and validation set (you can choose a different ratio).

## Global train/validation parameters

In [30]:
### Parameters ###
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'  # Train on GPU or CPU
TARGET_SLICES = (12,15)                                  # The slices we will train on for each patient
N_FEATURES = 128                                         # The length of feature vectors that the CNN outputs/LSTM will use
TRAIN_PERCENTAGE = 0.9                                   # Percentage of data that will be used for training

model_dir = './models'

##  networks

### Featurevector model

In [31]:
### Parameters ###
epochs = 1
batch_size = 2
n_features = 128

if not os.path.exists(model_dir):
    os.makedirs(model_dir)

### Create model ###
fc_net = FeatureVectorModel(n_features=n_features)

### Loss and optimizer ###
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(fc_net.parameters(), lr=0.0001)

### Create data generator (redefine for each network) ###
train_df, val_df = get_slice_train_val_dataframes(label_df, train_percentage = TRAIN_PERCENTAGE)

training_set = SliceDataset(train_df, TARGET_SLICES, DATA_DIR)
validation_set = SliceDataset(val_df, TARGET_SLICES, DATA_DIR)

training_iterator = data.DataLoader(training_set, batch_size=batch_size, shuffle = True)
validation_iterator = data.DataLoader(validation_set, batch_size=batch_size, shuffle = False)

### Train and Validate feature vector model
First we instantiate the Trainer Object 

In [None]:
trainer = Trainer(model=fc_net, criterion=criterion, optimizer=optimizer, 
                   train_loader=training_iterator, val_loader=validation_iterator, n_epochs=epochs, model_dir = model_dir)
trainer.train_and_validate()

## Train and Validate Resnet50

In [7]:
# ### Parameters ###
device = 0
epochs = 1
batch_size = 5
in_channels = 3
outsize = 1
n_features = 128


# Load Pre-Trained ResNet-50
model = models.resnet50(pretrained=True)

# Change the Pre-Trained Model to our own Defined Model
model = Net(model,"resnet50",n_features)


# ### Loss and optimizer ###
criterion = nn.BCEWithLogitsLoss()#nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

### Create data generator (redefine for each network) ###

train_df, val_df = get_slice_train_val_dataframes(label_df, train_percentage = TRAIN_PERCENTAGE)

training_set = SliceDataset(train_df, TARGET_SLICES, DATA_DIR)
validation_set = SliceDataset(val_df, TARGET_SLICES, DATA_DIR)

training_iterator = data.DataLoader(training_set, batch_size=batch_size, shuffle = True)
validation_iterator = data.DataLoader(validation_set, batch_size=batch_size, shuffle = False)

In [10]:
trainer = Trainer(model=model, criterion=criterion, optimizer=optimizer, 
                   train_loader=training_iterator, val_loader=validation_iterator, n_epochs=epochs, model_dir = model_dir)
trainer.train_and_validate()

Running resnet50


HBox(children=(FloatProgress(value=0.0, description='#epochs', max=1.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='#train_batches', max=22.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='#test_batches', max=3.0, style=ProgressStyle(description_…

epoch:   1 / 001, training loss: 0.7019, validation loss: 0.7045, training accuracy: 0.482, validation accuracy: 0.467.



## Train and Validate Resnet34

In [43]:
# ### Parameters ###
device = 0
epochs = 1
batch_size = 5
in_channels = 3
outsize = 1
n_features = 128



# Load Pre-Trained ResNet-34
model = models.resnet34(pretrained=True)

# Change the Pre-Trained Model to our own Defined Model
model = Net(model,"resnet34",n_features)


# ### Loss and optimizer ###
criterion = nn.BCEWithLogitsLoss()#nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

### Create data generator (redefine for each network) ###

train_df, val_df = get_slice_train_val_dataframes(label_df, train_percentage = TRAIN_PERCENTAGE)

training_set = SliceDataset(train_df, TARGET_SLICES, DATA_DIR)
validation_set = SliceDataset(val_df, TARGET_SLICES, DATA_DIR)

training_iterator = data.DataLoader(training_set, batch_size=batch_size, shuffle = True)
validation_iterator = data.DataLoader(validation_set, batch_size=batch_size, shuffle = False)
        

In [None]:
trainer = Trainer(model=model, criterion=criterion, optimizer=optimizer, 
                   train_loader=training_iterator, val_loader=validation_iterator, n_epochs=epochs, model_dir = model_dir)
trainer.train_and_validate()

I think the simple cases are obvious now, lets see how it works with the combined model.

-edit
does not work, I think the problem is somewhere in the models will investigate more later

In [21]:
## First train LSTM Model


### Parameters ###
epochs = 1
batch_size = 5
n_hidden = 64
n_features = 128

### Create model ###
lstm_net = LSTM(n_features = n_features, n_hidden = n_hidden, n_layers = 1)
combined_net = CombinedNet(cnn_net = model, lstm_net = lstm_net)
# Turn off learning for cnn_net
combined_net.set_learning_cnn_net(False)

### Loss and optimizer ###
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(combined_net.parameters(), lr=0.0001)

### Create data generator (redefine for each network) ###

### Create data generator (redefine for each network) ###
train_df, val_df, train_patients, val_patients = get_patient_train_val_dataframes(label_df, train_percentage = TRAIN_PERCENTAGE)

training_set = PatientDataset(train_df, train_patients, TARGET_SLICES,DATA_DIR,DEVICE)
validation_set = PatientDataset(val_df, val_patients, TARGET_SLICES,DATA_DIR,DEVICE)

training_iterator = data.DataLoader(training_set, batch_size=batch_size, shuffle = True)
validation_iterator = data.DataLoader(validation_set, batch_size=batch_size, shuffle = False)

NameError: name 'model' is not defined

In [22]:
trainer = Trainer(model=combined_net, criterion=criterion, optimizer=optimizer, 
                   train_loader=training_iterator, val_loader=validation_iterator, n_epochs=epochs, model_dir = model_dir)
trainer.train_and_validate()

NameError: name 'combined_net' is not defined