# Environment setup

## Download code and data


In [4]:
# Clone the GitHub repository and cd into it
!git clone 'https://github.com/CRefice/ml-segmentation-project.git'
%cd ml-segmentation-project/

Cloning into 'ml-segmentation-project'...
remote: Enumerating objects: 40, done.[K
remote: Counting objects: 100% (40/40), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 40 (delta 19), reused 24 (delta 8), pack-reused 0[K
Unpacking objects: 100% (40/40), done.
/content/ml-segmentation-project/ml-segmentation-project


In [5]:
# Download the data using the fetch script
!./fetch-data.sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
 31 72.8M   31 22.8M    0     0  3276k      0  0:00:22  0:00:07  0:00:15 4571k^C


## Useful imports and settings

In [9]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision import transforms, datasets, models
import copy
import matplotlib.pyplot as plt
import numpy as np
import os,sys
import pandas as pd
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

import unet
from pytorch_data_generator import DataGenerator
from data_loading import load_dataset


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Data importing and massaging

Now we import the dataset and create train/test splitters.

In [10]:
BATCH_SIZE = 16
TRAIN_SIZE = 2000

all_data, all_labels = load_dataset("Ganglioneuroblastoma", threshold=True)
train_data, train_labels = all_data[:TRAIN_SIZE, :, :, :], all_labels[:TRAIN_SIZE, :, :, :]
val_data, val_labels = all_data[TRAIN_SIZE:, :, :, :], all_labels[TRAIN_SIZE:, :, :, :]

train_set = DataGenerator(train_data, train_labels)
val_set = DataGenerator(val_data, val_labels)

dataloaders = {
    'train': DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=0),
    'val': DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
}

NameError: ignored

# Training

In [None]:
model = unet.UNet()
model = model.to(device)

summary(model, input_size=(1, 1000, 1000))

In [None]:
TRN_EPOCHS = 10
WEIGHT_DECAY = 1e-4
LEARNING_RATE = 0.01
MOMENTUM = 0.99

def train_model(model, optimizer, criterion, scheduler, num_epochs=25):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10e3

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        since = time.time()

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            
            epoch_loss = []
            
            if phase == 'train':
#                 scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            epoch_samples = 0
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)             

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                                      
                epoch_loss.append(loss)
            
            print("Epoch {} loss: {}".format(phase, np.mean(epoch_loss)))
                
            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                print("saving best model")
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)


model = unet.UNet()

# Observe that all parameters are being optimized
optimizer = torch.optim.SGD(model.parameters(), lr = LEARNING_RATE, momentum=MOMENTUM)
criterion = nn.CrossEntropyLoss()

# Maybe interesting to use later
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)

model = train_model(model, optimizer_ft, criterion, exp_lr_scheduler, num_epochs=TRN_EPOCHS)