In [1]:
from google.colab import drive
import sys

%load_ext autoreload
%autoreload 2

drive.mount('/content/drive', force_remount=True)
sys.path.insert(0, '/content/drive/MyDrive/ML_clean')

Mounted at /content/drive


In [2]:
!ls

drive  sample_data


In [3]:
%cd './drive/MyDrive/ML_clean'

/content/drive/MyDrive/ML_clean


In [4]:
!ls

compute_metrics.py    extract_dataset.py  __pycache__  training.py
constants.py	      get_prediction.py   run.py       Unet.py
data		      ML_notebook.ipynb   submissions
data_augmentation.py  models		  testing.py


In [5]:
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
#cd 'drive/MyDrive/ML_clean'

# Training pipeline

In [8]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import numpy as np
from Unet import Unet 

from constants import *
from extract_dataset import extract_dataset, RoadDataset
from training import run_training
from testing import testing
from compute_metrics import compute_metrics

### 1) Extract data and labels

Here, we crop each image into 5 images of size 256 x 256 and we can choose to augment our data (we have initially 100 training images) :

In [9]:
training_data, training_labels = extract_dataset(augment=True)

Loading 100 images and crop them.
Augment data.


In [10]:
print('We now have:', len(training_data), 'images and labels to train our model.')

We now have: 2780 images and labels to train our model.


### 2) Split into training and validation set

In [11]:
# Split in training and validation set
train_data, validation_data, train_labels, validation_labels = train_test_split(training_data, training_labels, test_size=0.2, random_state=42)

### 3) Build datasets and Dataloader

In [12]:
# Build training and validation datasets
train_set = RoadDataset(train_data, train_labels)
validation_set = RoadDataset(validation_data, validation_labels)

# Build dataloader
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(validation_set, 1, shuffle=False)

### 4) Start training with a Unet neural network

In [13]:
# Set parameters to train 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Unet()
num_epochs = 50
optimizer_kwargs = dict(lr=1e-3, weight_decay=1e-3,)
model = model.to(device=device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters(), **optimizer_kwargs)

In [14]:
# Train
model.train()
loss_train_history = []
f1_train_history = []
best_f1 = 0

for epoch in range(1, num_epochs + 1):
    print('\n-------------> epoch: {}'.format(epoch))
    #train_loss, train_acc, lrs = train_epoch(model, optimizer, criterion, train_loader, epoch, device)
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        data = data.permute(0, 3, 1, 2)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)

        pred = torch.round(output)
        # Compute confusion vector between 2 tensors
        confusion_vector = pred / target
        # Compute validation f1 score
        f1_train = compute_metrics(confusion_vector, print_values=False)
        f1_train_history.append(f1_train)
        
        loss.backward()
        optimizer.step()         
        loss_float = loss.item()
        loss_train_history.append(loss_float)

    train_losses = np.mean(loss_train_history)
    epoch_f1 = np.mean(f1_train_history)
    print('Loss score for training:', train_losses)
    print('F1 score for training:', epoch_f1)

    #  Validate Model 
    model.eval()
    loss_val_history = []
    f1_val_history = []
    for data_val, target_val in val_loader:
        data_val, target_val = data_val.to(device), target_val.to(device)
        data_val = data_val.permute(0, 3, 1, 2)

        output_val = model(data_val)

        loss_val = criterion(output_val, target_val).item()
        loss_val_history.append(loss_val)

        pred_val = torch.round(output_val)

        # Compute confusion vector between 2 tensors
        confusion_vector_val = pred_val / target_val
        # Compute validation f1 score
        f1_val = compute_metrics(confusion_vector_val, print_values=False)
        f1_val_history.append(f1_val)
    
    val_loss = np.mean(loss_val_history)
    epoch_f1_val = np.mean(f1_val_history)
    print('\nLoss score for validation:', val_loss)
    print('F1 score for validation:', epoch_f1_val)

    # Save model if best
    if epoch_f1_val > best_f1:
        best_f1 = epoch_f1_val
        best_model = model
        torch.save(model.state_dict(), 'models/best_model_f1.pth')
        print('-------------> NEW BEST MODEL SAVED')



-------------> epoch: 1
Loss score for training: 0.48706881305296645
F1 score for training: 0.0023690339090155312

Loss score for validation: 0.45875670500903676
F1 score for validation: 0.0

-------------> epoch: 2
Loss score for training: 0.45974517726212094
F1 score for training: 0.02529816310120632

Loss score for validation: 0.40347151527945085
F1 score for validation: 0.45174707118585894
-------------> NEW BEST MODEL SAVED

-------------> epoch: 3
Loss score for training: 0.427797968439061
F1 score for training: 0.19424410038085768

Loss score for validation: 0.31374209994624414
F1 score for validation: 0.5505369179319721
-------------> NEW BEST MODEL SAVED

-------------> epoch: 4
Loss score for training: 0.39217635782824145
F1 score for training: 0.3125667643114033

Loss score for validation: 0.24978113444455022
F1 score for validation: 0.7007992590768564
-------------> NEW BEST MODEL SAVED

-------------> epoch: 5
Loss score for training: 0.36259263648832446
F1 score for trai

Based on the f1 score we save the best model and we can then apply it to our testing images

## Testing pipeline

In [None]:
import run 