In [111]:
import gc
import math
import os
from typing import List, Tuple

import matplotlib.pyplot as plt
import pandas as pd
import rasterio
import rasterio.plot
import seaborn as sns
import torch
import torch.backends.cudnn as cudnn
import torchvision.transforms as transforms
import yaml
from ml_commons import *
from torch import nn, optim
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision.models import (EfficientNet_V2_S_Weights, Swin_V2_S_Weights,
                                efficientnet_v2_s, swin_v2_s)

cudnn.benchmark = True
sns.set_theme()

In [112]:
config = yaml.safe_load(open('ml_config.yml'))

In [113]:
prefix_dir = config['paths']['prefix_dir']
dataset_dir = os.path.join(prefix_dir, config['paths']['dataset_dir'])

In [114]:
output_dir = os.path.join(config['paths']['machine_learning_dir'], 'output')

In [115]:
gc.collect()
with torch.no_grad():
    torch.cuda.empty_cache()

In [116]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cuda for inference


In [117]:
def print_model(model:nn.Module, name:str):
    spacing = '  '
    model_str = spacing + f'\n{spacing}'.join(str(model).splitlines())
    print(f'--- {name} ---\n{model_str}\n{"-" * (8 + len(name))}')

In [118]:
print(f'Using model: ' + config['model']['name'])
model_name = str(config['model']['name']).lower()
if model_name == 'swintransformer':
    model = swin_v2_s(weights=Swin_V2_S_Weights.DEFAULT)
elif model_name == 'efficientnet':
    model = efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)
else:
    raise RuntimeError(f'Model "' + config['model']['name'] + '" is unknown')

if config['model']['freeze_parameters']:
    for param in model.parameters(): #freeze model
        param.requires_grad = False

print_model(get_model_head(model), 'Initial Model Head')
num_features = get_num_features(model)
print(f'Classification layer has {num_features} input features')
new_model_head = nn.Sequential()
# Before linear layer
if config['processing']['use_dropout']:
    new_model_head.append(nn.Dropout(p=config['processing']['dropout_p'], inplace=True))
# Linear layer
new_model_head.append(nn.Linear(num_features, len(classes)))
# After linear layer
if config['processing']['use_ordinal_regression'] and config['processing']['activation_function'] != False:
    activation_function = str(config['processing']['activation_function']).lower()
    if activation_function == 'sigmoid':
        activation_function = nn.Sigmoid()
    elif activation_function == 'relu':
        activation_function = nn.ReLU()
    elif activation_function == 'tanh':
        activation_function = nn.Tanh()
    else:
        raise RuntimeError(f'Unkown activation function: {activation_function}')
    new_model_head = new_model_head.append(activation_function)

set_model_head(model, new_model_head)
print_model(get_model_head(model), 'Modified Model Head')
# model = nn.DataParallel(model)
model = model.to(device)

Using model: EfficientNet
--- Initial Model Head ---
  Sequential(
    (0): Dropout(p=0.2, inplace=True)
    (1): Linear(in_features=1280, out_features=1000, bias=True)
  )
--------------------------
Classification layer has 1280 input features
--- Modified Model Head ---
  Sequential(
    (0): Linear(in_features=1280, out_features=5, bias=True)
    (1): ReLU()
  )
---------------------------


In [119]:
training_weights = pd.read_csv(os.path.join(dataset_dir, 'training_weights.csv'), index_col='label')
training_weights.T

label,CLR,FEW,SCT,BKN,OVC
weight,1.822467,14.258716,18.502381,17.988426,3.682938


In [120]:
def prediction_to_label(pred: torch.Tensor) -> torch.Tensor:
    return (pred > 0.5).cumprod(axis=1).sum(axis=1) - 1

In [121]:
class OrdinalRegression():
    def __init__(self, weights:Optional[torch.Tensor]) -> None:
        if weights is None:
            self.weights = torch.Tensor([1] * len(classes))
            self.weights.to(device)
        else:
            self.weights = weights
    def __call__(self, predictions: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
        modified_targets = torch.zeros_like(predictions)
        for i, target in enumerate(targets):
            modified_targets[i, 0 : target + 1] = 1
        return torch.mean((nn.MSELoss(reduction='none')(predictions, modified_targets) * self.weights).sum(axis=1))

In [122]:
def mean_std(loader:DataLoader):
  sum, squared_sum, num_batches = 0,0,0
  for data, _, _ in loader:
    sum += torch.mean(data,dim=[0,1,2])
    squared_sum += torch.mean(data**2,dim=[0,1,2])
    num_batches += 1
  mean = sum/num_batches
  std = (squared_sum/num_batches - mean**2)**0.5
  return mean, std

In [123]:
dataset_transforms = [
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
]
composed_transforms = transforms.Compose(dataset_transforms)

In [124]:
use_manual_labels = config['processing']['use_manual_labels']

In [125]:
train_dataset       = AimlsseImageDataset(DatasetType.TRAINING,     dataset_dir, transfrom=composed_transforms, use_manual_labels=use_manual_labels)
validation_dataset  = AimlsseImageDataset(DatasetType.VALIDATION,   dataset_dir, transfrom=None, use_manual_labels=use_manual_labels)

TypeError: __init__() got an unexpected keyword argument 'use_manual_labels'

In [None]:
def batch_normalization(dataset:AimlsseImageDataset, dataset_type:DatasetType, dataset_transforms):
    mean, std = mean_std(dataset)
    print(f'{dataset_type.name} - mean {mean:.3f}, std {std:.3f}')
    if dataset_transforms is None:
        dataset_transforms = []
    return AimlsseImageDataset(dataset_type, dataset_dir,
                               transfrom = transforms.Compose(dataset_transforms + [transforms.Normalize(mean, std)]),
                               use_manual_labels=use_manual_labels)

In [None]:
if config['processing']['batch_normalization']:
    train_dataset       = batch_normalization(train_dataset, DatasetType.TRAINING, dataset_transforms)
    validation_dataset  = batch_normalization(validation_dataset, DatasetType.VALIDATION, None)

In [None]:
if config['processing']['use_weighted_sampler']:
    num_samples = len(train_dataset)
    weights = [0] * num_samples
    for i in range(num_samples):
        label = train_dataset.get_label(i)
        weights[i] = training_weights.loc[class_names]['weight'].iloc[label]
    training_sampler = WeightedRandomSampler(weights, num_samples)
    train_dataloader =  DataLoader(train_dataset,       batch_size=config['processing']['batch_size'], sampler=training_sampler)
else:
    train_dataloader =  DataLoader(train_dataset,       batch_size=config['processing']['batch_size'], shuffle=True)
validation_dataloader = DataLoader(validation_dataset,  batch_size=config['processing']['batch_size'], shuffle=True)

In [None]:
sample_batch_index = 0

In [None]:
if config['output']['show_samples']:
    plot_samples(train_dataset, config['processing']['batch_size'], sample_batch_index)
    sample_batch_index += 1

In [None]:
if config['processing']['use_weighted_loss_function']:
    loss_function_weights = torch.tensor(training_weights['weight'].to_list())
    loss_function_weights = loss_function_weights.to(device)
else:
    loss_function_weights = None
print(f'Loss weights: {loss_function_weights}')

if config['processing']['use_ordinal_regression']:
    criterion = OrdinalRegression(loss_function_weights)
    outputs_to_predictions = prediction_to_label
else:
    criterion = nn.CrossEntropyLoss(loss_function_weights)
    outputs_to_predictions = lambda outputs: torch.max(outputs, 1)[1]

learning_rate = math.pow(10, -config['processing']['learning_rate_exp'])
weight_decay = math.pow(10, -config['processing']['weight_decay_exp']) if config['processing']['use_weight_decay'] else 0.0
optimizer_name = str(config['processing']['optimizer']).lower()
if optimizer_name == 'adam':
    optimizer = optim.Adam(get_model_head(model).parameters(), lr=learning_rate, weight_decay=weight_decay)
elif optimizer_name == 'sgd':
    optimizer = optim.SGD(get_model_head(model).parameters(), lr=learning_rate, momentum=config['processing']['momentum'])
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Loss weights: tensor([ 1.8225, 14.2587, 18.5024, 17.9884,  3.6829], device='cuda:0')


In [None]:
model_data = ModelData(train_dataset, validation_dataset, train_dataloader, validation_dataloader)
checkpoint_filepath = os.path.join(config['paths']['machine_learning_dir'], 'checkpoints', 'chk.pt')
print(f'Model Checkpoints will be stored in: {checkpoint_filepath}')
output_filepath = os.path.join(config['paths']['machine_learning_dir'], 'output', config['output']['output_name'])
print(f'The results will be stored in: {output_filepath}')
model_trained = train_model(model, device, model_data, criterion, outputs_to_predictions, optimizer, scheduler,
                            checkpoint_filepath, num_epochs=config['processing']['num_epochs'],
                            batch_accumulation=config['processing']['batch_accumulation'], config=config)
print('Copying data from checkpoint to results..')
state = load_state(checkpoint_filepath)
save_state(output_filepath, state)
print(f'Results stored in: {output_filepath}')
print('Done!')

Model Checkpoints will be stored in: ML\checkpoints\chk.pt
The results will be stored in: ML\output\local_dummy.pt
Epoch 5/31
----------


  0%|          | 0/486 [00:02<?, ?it/s]


KeyboardInterrupt: 