# NOTEBOOK FOR  TRAINING MANET MODEL ON ELECTRO-L №2 DATA

## IMPORT ALL REQUIRED PACKAGES

In [59]:
import torch
import torch.nn.functional as F
import torchmetrics
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import pandas as pd
import patoolib
from copy import deepcopy
from tqdm import tqdm
from timeit import default_timer as timer 
from PIL import Image
import numpy as np
import gc
import warnings
import os
import matplotlib.pyplot as plt
from torch import nn
from IPython.display import clear_output
from torcheval.metrics.functional import multiclass_f1_score
import random
import glob
import tifffile as tff
from skimage.transform import resize as interp_resize
import segmentation_models_pytorch as smp
import torch.optim as optim
import rasterio
from ranger21 import Ranger21
import kornia as K
from kornia.augmentation.container import AugmentationSequential
from kornia.augmentation import (
    RandomAffine,
    RandomElasticTransform,
    RandomHorizontalFlip,
    RandomPerspective,
    RandomRotation,
    RandomVerticalFlip)
%matplotlib inline
warnings.filterwarnings("ignore")

('11.7', '1.23.5')

## Initilize your current directory and device (CUDA recommended)

In [61]:
np.set_printoptions(threshold=1e7)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
your_current_dir = os.getcwd()
print('Your current dir of this .ipynb file',your_current_dir)
print('Your device:',device)
your_current_dir = your_current_dir.replace('training_model_utils','')

device(type='cuda')

## Set random seeds for stability everywhere

In [63]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything(seed=42)

## Define functions for preprocessing and postprocessing multispectral data as pytorch tensors

In [64]:
def batch_to_img(xb, idx=0):
    img = np.array(xb.squeeze(0))
    return np.array(img.transpose((1,2,0))*255)
def transpose_patch(patch):
    tr = np.array(patch).astype(np.uint8).transpose((2,0,1))
    return tr
def predb_to_mask(predb,idx=0):
    p = torch.functional.F.softmax(predb.squeeze(0), 0)
    return p.argmax(0).cpu()
def inverse_normalize(tensor,mean,std,num_shannels =5):
    tensor = tensor.detach().cpu().numpy()
    # unnormalize the RGB channels
    for i in range(num_shannels):
        tensor[i] = (tensor[i] * std[i]) + mean[i]
    # clip values to [0, 1] range
    tensor = np.clip(tensor, 0, 1)
    # convert back to uint8
    tensor = (tensor * 255).astype(np.uint8)
    return tensor
def open_sample_as_pil_no_norm(datacube):
    inversed_rgb = (datacube[0:3])
    rgb = np.dstack((inversed_rgb[2,:,:],inversed_rgb[1,:,:],inversed_rgb[0,:,:]))*255
    return rgb.astype('uint8')
def open_DEM(datacube):
    dem_arr = datacube[5]#*255
    return dem_arr.numpy()#.astype('uint8')
def open_mask_as_pil(seglabel):
    mask = (seglabel.numpy())
    return mask

## MODEL INITIALIZATION

In [65]:
# lets initialize  architecture of segmentation FOR ALL 12 channels
model = smp.MAnet(
    encoder_name='efficientnet-b0', 
    in_channels=12,
    classes=3,)

## CREATE A DATASET CLASS WITH ELECTRO-L №2 with masks from GOES, METEOSAT & Terra/MODIS for training**

In [169]:
class ELECTRO_L2_Dataset_(Dataset):
    def __init__(self, stack_dir_list,aux_dir, pytorch=True, include_BT = True,nonempty_mode = True):
        super().__init__()
        self.pytorch = pytorch   
        self.nonempty_mode = nonempty_mode
        self.include_BT = include_BT
        self.stack_dir_list = stack_dir_list
        self.non_empty_list = []
        self.snowy_list = []
        self.stack_dirs = []
        self.stack_dirs_BT = []
        self.all_snow_flags = []
        self.non_empty_snow_idxs = []
        self.non_empty_aux = []
        self.aux_dir = aux_dir
        total_len = 0
        for i in range(len(self.stack_dir_list)): # iterate over number of pictures in dataset (18 now)
            stack_dir = self.stack_dir_list[i]
            stack_dir_BT = self.stack_dir_list[i].replace('rgb', 'BT').replace('folder_ZSA','folder_BT')
            non_empty_idxs_dir = self.stack_dir_list[i].replace('all_patch_folder_ZSA','nonempty_idxs_folder')
            non_empty_idxs_dir = non_empty_idxs_dir.replace('patches_rgb','nonempty_idxs')
            snowy_idxs_dir = non_empty_idxs_dir.replace('nonempty_idxs_folder_ZSA', 'snowy_idxs_folder')
            snowy_idxs_dir = snowy_idxs_dir.replace('nonempty', 'snowy')
            snow_flag_array = np.load(snowy_idxs_dir+'.npy')
            self.all_snow_flags.append(snow_flag_array)
            if self.nonempty_mode:
                non_empty_idxs = np.load(non_empty_idxs_dir+'.npy')
                snow_idxs_nonempty = np.zeros(len(snow_flag_array))
                snow_idxs_nonempty[non_empty_idxs]=1
                snow_idxs_nonempty = (snow_idxs_nonempty*snow_flag_array).astype(np.uint8)
                for j in range(len(non_empty_idxs)):
                    idx_ = non_empty_idxs[j]
                    if snow_idxs_nonempty[idx_] == 1:
                        self.non_empty_snow_idxs.append(total_len+j)
                    patch_file = f"{stack_dir}/patch_{idx_}.tif"
                    self.stack_dirs.append(patch_file)
                    patch_file_BT = f"{stack_dir_BT}/patch_{idx_}.tif"
                    self.stack_dirs_BT.append(patch_file_BT)
                    self.non_empty_aux.append(idx_)
                total_len = total_len+len(non_empty_idxs)
            else:
                patch_files = glob.glob(stack_dir+'/*')
                patch_files_BT = glob.glob(stack_dir_BT+'/*')
                for patch_file in patch_files:
                    self.stack_dirs.append(patch_file)
                for patch_file_BT in patch_files_BT:
                    self.stack_dirs_BT.append(patch_file_BT) 
        self.all_snow_flags_nonempty =np.zeros(total_len).astype(np.uint8)
        self.all_snow_flags_nonempty[self.non_empty_snow_idxs]=1
        self.all_snow_flags = np.array(self.all_snow_flags).flatten()
        
    def __len__(self):
        num_patches = int((len(self.stack_dirs))) 
        return num_patches
    
    def open_rgb_normed(self, idx, invert=False):
        patch_file = self.stack_dirs[idx]
        patch = tff.imread(patch_file)
        r,g,b = patch[:,:,0]/255, patch[:,:,1]/255, patch[:,:,2]/255
        patch_normed  = np.dstack([b, g ,r])
        return patch_normed 
    
    def open_BT_normed(self, idx, invert=False):
        patch_file_BT = self.stack_dirs_BT[idx]
        patch_BT = tff.imread(patch_file_BT)
        return patch_BT 
    
    def open_aux(self, idx, invert=False):
        patch_file = self.aux_dir+'/patch_'+str(self.non_empty_aux[idx])+'.tif'
        patch = tff.imread(patch_file)
        lon,lat,dem = patch[:,:,0],patch[:,:,1],patch[:,:,2]
        lon,lat = (lon+180.0)/360.0,(lat+90.0)/180.0
        dem = dem/10000
        lonlatdem_normed = np.dstack([lon,lat,dem])
        return lonlatdem_normed
    
    def open_mask(self, idx):
        patch_file = self.stack_dirs[idx]
        mask_file = patch_file.replace('rgb','masks').replace('tif','jpg').replace('all_patch_folder_ZSA','all_masks_folder')
        mask = tff.imread(mask_file)
        snow_mask = mask[:,:,1]//255 #  1 - snow class
        cloud_mask = mask[:,:,0] # 2 - cloud class
        cloud_mask[cloud_mask!=0] = 2
        bg = mask[:,:,2]
        bin_mask = snow_mask + cloud_mask
        return bin_mask
    
    def open_as_pil(self, idx):
        patch_file = self.stack_dirs[idx]
        patch_image = tff.imread(patch_file)
        return patch_image 
    
    def __getitem__(self, idx):
        patch_file = self.stack_dirs[idx]
        mask_file = patch_file.replace('rgb','masks').replace('tif','jpg').replace('all_patch_folder_ZSA','all_masks_folder')
        mask = tff.imread(mask_file)
        snow_mask = mask[:,:,1]//255 #  1 - snow class
        cloud_mask = mask[:,:,0] # 2 - cloud class
        cloud_mask[cloud_mask!=0]=2
        bin_mask = snow_mask+cloud_mask
        patch = tff.imread(patch_file)
        r,g,b = patch[:,:,0]/255, patch[:,:,1]/255, patch[:,:,2]/255
        aux_file = self.aux_dir+'/patch_'+str(self.non_empty_aux[idx])+'.tif'
        aux = tff.imread(aux_file)
        lon,lat,dem = aux[:,:,0],aux[:,:,1],aux[:,:,2]
        lon,lat = (lon+180.0)/360.0,(lat+90.0)/180.0
        dem = dem/10000
        aux_normed = np.stack([lon, lat, dem])
        patch_normed  = np.stack([b, g ,r])
        if self.include_BT:
            patch_file_BT = self.stack_dirs_BT[idx] 
            BT_normed = tff.imread(patch_file_BT).transpose(2,0,1)
            full_stack = np.concatenate((patch_normed, BT_normed, aux_normed), axis=0)
            # order of channels: b, g, r, BT4, BT5, BT6, BT7, BT8, BT9, lon, lat, dem
            full_stack = torch.tensor(full_stack, dtype=torch.float32)
        else:
            full_stack = np.concatenate((patch_normed, aux_normed), axis=0)
            full_stack = torch.tensor(full_stack, dtype=torch.float32)
        return full_stack, torch.tensor(bin_mask).long()

## DEFINE A PIPELINE CLASS OF GEOMETRIC TRANSFORM ON GPU USING Kornia

In [172]:
import kornia as K
from kornia.augmentation.container import AugmentationSequential
class Geom_Augmentation(nn.Module):
    def __init__(self):
        super(Geom_Augmentation, self).__init__()
        # we define and cache our operators as class members
        self.augs = AugmentationSequential(
                    RandomVerticalFlip(p=1),
                    RandomHorizontalFlip(p=1),
                    RandomPerspective(0.25, sampling_method = 'area_preserving', p=1.),
                    RandomAffine(degrees =(-85.0,85.0),translate = None,scale = (0.9, 1.1),resample="nearest",shear = None,padding_mode="reflection",align_corners=True,same_on_batch=False,keepdim=True,p=1),
                    RandomElasticTransform(kernel_size=(33, 33), sigma=(6.0, 6.0), alpha=(1.0, 1.0), align_corners=True, resample='nearest', padding_mode='reflection', same_on_batch=False, p=1.0, keepdim=True),
                    data_keys=['input', 'mask'], same_on_batch = False, random_apply = 2)                       
    def forward(self, img: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
        # 2. apply geometric tranform
        out = self.augs(img, mask)
        img_out, mask_out = out[0],out[1]
        return img_out, mask_out
geom_augs = Geom_Augmentation()

## USE WEIGHTED SAMPLER TO MAKE DATALOADER MORE BALANCED WITH SNOW PATCHES

In [None]:
#SAMPLER FOR ELECTRO L2 DS FOR FINETUNNING
Electro_ds_tr, Electro_ds_val  = torch.utils.data.random_split(Electro_ds, (0.8,0.2))
non_empty_snow_patches_ds = Electro_ds_tr.all_snow_flags_nonempty
num_non_empty_snow_patches = len(non_empty_snow_patches_ds)
bg_weight = num_non_empty_snow_patches/len(Electro_ds_tr)
snowy_weight = 1 - num_non_empty_snow_patches/len(Electro_ds_tr)
batch_size = 16
_, counts = torch.unique(torch.tensor(non_empty_snow_patches_ds), return_counts=True)
weights = counts.max() / counts
print("Weights: ", weights)
weight_for_sampler_l2 = []  # Every sample must have a weight
for snow_flag in non_empty_snow_patches_ds:
    weight_for_sampler_l2.append(weights[snow_flag].item())
sampler_l2 = WeightedRandomSampler(torch.tensor(weight_for_sampler_l2), len(Electro_ds_tr))

In [175]:
batch_size = 16 #16 # try to set as max as possible
train_dl_ = DataLoader(Electro_ds_tr, batch_size=batch_size, sampler = sampler_l2)
val_dl_ = DataLoader(Electro_ds_val, batch_size=batch_size, shuffle= False)
len(Electro_ds)

(11789, 1687, 1590)

## Define function for train 1 epoch and saving best model according IoU metric value on validation subset

In [176]:
def save_best_model(model,epoch,path =  r'H:/ELECTRO_DATASET/4_km_res/L2/models/MAnet_Efficient_b0_12_inputs_4km_res_ep_'):
        model_copy = deepcopy(model)
        best_model = model_copy
        best_model_name = path+str(epoch)
        torch.save(best_model.state_dict(),best_model_name)
        print('best model is on epoch =',epoch)
        return best_model
def train_ep(model, train_dataload, valid_dataload, dice, focal,focal_alpha, optimizer,ep_i, best_valid_iou, scheduler =None):    
    model.cuda()#cuda()
    print('epoch_n =',ep_i)
    model.train(True)  # Set train mode = true
    step = 0
    train_loss = 0
    #initialize metrics
    train_f1_score = torchmetrics.F1Score(num_classes=3, task = 'multiclass', average = 'macro').cuda()
    train_iou_score = torchmetrics.JaccardIndex(num_classes=3, task = 'multiclass').cuda()
    train_f1_score_sep = torchmetrics.F1Score(num_classes=3, task = 'multiclass', average = None).cuda()
    train_iou_score_sep = torchmetrics.JaccardIndex(num_classes=3, task = 'multiclass', average = None).cuda()
    val_f1_score = torchmetrics.F1Score(num_classes=3, task = 'multiclass', average = 'macro').cuda()
    val_iou_score = torchmetrics.JaccardIndex(num_classes=3, task = 'multiclass').cuda()
    val_f1_score_sep = torchmetrics.F1Score(num_classes=3, task = 'multiclass', average = None).cuda()
    val_iou_score_sep = torchmetrics.JaccardIndex(num_classes=3, task = 'multiclass', average = None).cuda()
    val_acc = torchmetrics.Accuracy(num_classes=3, task = 'multiclass').cuda()
    # iterate over data
    print('-----------training process---------')
    for x,y in tqdm(train_dataload): 
        x = torch.tensor(x).type(torch.float32).cuda()
        y = torch.tensor(y).type(torch.float32).cuda()
        #add geom augmentations on GPU from kornia
        x, y = geom_augs(x, y.unsqueeze(1)) #convert labels to float32 for kornia !!!!
        y = y.type(torch.LongTensor).squeeze(1).cuda() #convert labels to Long again for model !!!!
        step += 1
        # vector graph of training with grad on CUDA
        optimizer.zero_grad()
        output = model(x)
        output = torch.functional.F.softmax(output, 1)
        predictions = output.argmax(dim=1).cuda()
        loss = (1-focal_alpha)*dice(output, y)+focal_alpha*focal(output, y)
        loss.backward()
        optimizer.step()
        # other things can be done on CPU
        loss = loss.detach()
        predictions, y = predictions.detach(), y.detach()
        train_f1_score.update(predictions, y)
        train_iou_score.update(predictions, y)
        train_f1_score_sep.update(predictions, y)
        train_iou_score_sep.update(predictions, y)
        train_loss=train_loss+loss
        x,y = None, None
        # need for torch.no_grad in this training pass
        if scheduler!=None:
            scheduler.step()
    train_loss = train_loss.cpu()/len(train_dataload)
    print('after training epoch mean train loss =',train_loss)
    # Compute the train metrics for the epoch
    train_f1 = train_f1_score.compute()
    train_iou = train_iou_score.compute()
    train_f1_sep = train_f1_score_sep.compute()
    train_iou_sep = train_iou_score_sep.compute()

    # Reset the train metrics objects for the next epoch
    train_f1_score.reset()
    train_iou_score.reset()
    train_f1_score_sep.reset()
    train_iou_score_sep.reset()
# Inside your epoch training loop, after each batch is processed, compute the metrics on the batch predictions and ground truth
    with torch.no_grad():
        val_loss = 0
        print('-----------validation process---------')
        for x,y in tqdm(valid_dataload):
            x = x.cuda()#.cuda()
            y = y.type(torch.LongTensor).cuda()
            output = model(x)
            output = torch.functional.F.softmax(output, 1)
            # Assuming output has shape (batch_size, num_classes, height, width)
            # Convert the output to predictions by taking the argmax along the channel dimension
            predictions = output.argmax(dim=1).cuda()
            valid_loss = (1-focal_alpha)*dice(output, y)+focal_alpha*focal(output, y)
            val_loss = val_loss+valid_loss
            val_f1_score.update(predictions, y)
            val_iou_score.update(predictions, y)
            val_f1_score_sep.update(predictions, y)
            val_iou_score_sep.update(predictions, y)
            val_acc.update(predictions, y)
            x,y = None, None
        val_f1 = val_f1_score.compute()
        val_iou = val_iou_score.compute()
        val_acc_ = val_acc.compute()
        #calculate the same metrics seperately for each class
        val_f1_sep = val_f1_score_sep.compute()
        val_iou_sep = val_iou_score_sep.compute()
        FAR = 1 - val_acc_
            
        # Compute the validation metrics for the epoch
        val_loss = val_loss.cpu()/(len(valid_dataload))
        print('valid loss =',val_loss)

        # Reset the validation metrics objects for the next epoch
        val_f1_score.reset()
        val_iou_score.reset()
        val_f1_score_sep.reset()
        val_iou_score_sep.reset()
        val_acc.reset()
        # Print the F1 score and IoU for the current epoch on the train and validation sets
        print('######### METRICS AFTER TRAINING EPOCH #########')
        print(f"Epoch {ep_i}, Train F1 score: {train_f1:.4f}, Train IoU: {train_iou:.4f}, Val F1 score: {val_f1:.4f}, Val IoU: {val_iou:.4f}")
        print(f"Train F1 score for each class: {train_f1_sep.cpu().numpy()}, \nTrain IoU for each class: {train_iou_sep.cpu().numpy()}")
        print(f"Test FAR: {FAR:.4f}")
        print(f"Test F1 score for each class: {val_f1_sep.cpu().numpy()}, \nTest IoU for each class: {val_iou_sep.cpu().numpy()}")
        if (best_valid_iou<val_iou) or (val_iou>0.75):
            save_best_model(model,ep_i)
            print(" model updated")
        return train_f1.cpu(),train_iou.cpu(),val_f1.cpu(),val_iou.cpu(), train_loss.cpu(), val_loss.cpu()

## Train loop MANet ELECTRO-L №2 data with Ranger21 optimizer**

In [32]:
# continue finetunning training process on ELECTRO L2 ds with Ranger21 optimizer
from ranger21 import Ranger21
max_ep_num = 3
best_valid_iou_ = 0.5
cur_ep = 0
lr, weight_decay = 1e-3, 1e-4
#optimizer = optim.AdamW(model.parameters(),lr = 1e-3)
#scheduler = CosineAnnealingLR(optimizer, T_max=max_ep_num)
optimizer_Ranger21 =  Ranger21(model.parameters(), lr = lr, weight_decay = weight_decay,
                                num_epochs = max_ep_num,num_batches_per_epoch = len(train_dl_))
model.train()
dice = smp.losses.DiceLoss(mode= 'multiclass')
focal = smp.losses.FocalLoss(mode= 'multiclass', gamma = 2)
focal_alpha = 0.7
tr_f1_,val_f1_,tr_iou_, val_iou_,tr_loss_,val_loss_ = [],[],[],[best_valid_iou_],[],[]
for ep_i in range(cur_ep,cur_ep+max_ep_num):
    train_f1_,train_iou_,valid_f1_,valid_iou_,train_loss_,valid_loss_= train_ep(model, train_dl_, valid_dl_, dice, focal,focal_alpha,optimizer_Ranger21,ep_i,best_valid_iou_,scheduler=None)
    tr_f1_.append(train_f1_)
    val_f1_.append(valid_f1_)
    tr_iou_.append(train_iou_)
    val_iou_.append(valid_iou_)
    best_valid_iou_ = max(val_iou_)
    tr_loss_.append(train_loss_)
    val_loss_.append(valid_loss_)

Ranger21 optimizer ready with following settings:

Core optimizer = AdamW
Learning rate of 0.001

Important - num_epochs of training = ** 30 epochs **
please confirm this is correct or warmup and warmdown will be off

Warm-up: linear warmup, over 2000 iterations

Lookahead active, merging every 5 steps, with blend factor of 0.5
Norm Loss active, factor = 0.0001
Stable weight decay of 0.0001
Gradient Centralization = On

Adaptive Gradient Clipping = True
	clipping value of 0.01
	steps for clipping = 0.001

Warm-down: Linear warmdown, starting at 72.0%, iteration 8229 of 11430
warm down will decay until 3e-05 lr
epoch_n = 0
-----------training process---------


  0%|▏                                                                                 | 1/381 [00:08<55:15,  8.72s/it]

params size saved
total param groups = 1
total params in groups = 307


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [08:39<00:00,  1.36s/it]


after training epoch mean train loss = tensor(0.6554)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [01:23<00:00,  1.13s/it]


valid loss = tensor(0.5764)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 0, Train F1 score: 0.4236, Train IoU: 0.3037, Val F1 score: 0.5324, Val IoU: 0.4349
Train F1 score for each class: [0.6406687  0.03829631 0.59187484], 
Train IoU for each class: [0.4713117  0.01952197 0.4203283 ]
Test FAR: 0.2333
Test F1 score for each class: [0.7897525  0.02848086 0.7788766 ], 
Test IoU for each class: [0.6525544  0.01444615 0.6378361 ]
epoch_n = 1
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [07:14<00:00,  1.14s/it]


after training epoch mean train loss = tensor(0.5332)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.49it/s]


valid loss = tensor(0.5272)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 1, Train F1 score: 0.5776, Train IoU: 0.4965, Val F1 score: 0.5377, Val IoU: 0.4489
Train F1 score for each class: [0.8599661  0.03866352 0.8341346 ], 
Train IoU for each class: [0.7543337  0.01971284 0.71546376]
Test FAR: 0.1994
Test F1 score for each class: [0.8089161  0.00533867 0.7987582 ], 
Test IoU for each class: [0.6791428  0.00267648 0.66494375]
epoch_n = 2
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [06:38<00:00,  1.05s/it]


after training epoch mean train loss = tensor(0.5078)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.61it/s]


valid loss = tensor(0.5153)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 2, Train F1 score: 0.5740, Train IoU: 0.5040, Val F1 score: 0.5428, Val IoU: 0.4577
Train F1 score for each class: [8.7347227e-01 3.6097856e-04 8.4824526e-01], 
Train IoU for each class: [7.7536672e-01 1.8052186e-04 7.3648071e-01]
Test FAR: 0.1889
Test F1 score for each class: [8.1708777e-01 1.3815110e-04 8.1117904e-01], 
Test IoU for each class: [6.9074255e-01 6.9080321e-05 6.8233907e-01]
epoch_n = 3
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [06:25<00:00,  1.01s/it]


after training epoch mean train loss = tensor(0.5035)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.53it/s]


valid loss = tensor(0.5088)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 3, Train F1 score: 0.5765, Train IoU: 0.5078, Val F1 score: 0.5486, Val IoU: 0.4659
Train F1 score for each class: [8.740845e-01 2.483866e-05 8.552733e-01], 
Train IoU for each class: [7.7633220e-01 1.2419484e-05 7.4714208e-01]
Test FAR: 0.1805
Test F1 score for each class: [8.2391238e-01 4.5828699e-04 8.2138854e-01], 
Test IoU for each class: [7.0055360e-01 2.2919601e-04 6.9691217e-01]
epoch_n = 4
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [06:11<00:00,  1.03it/s]


after training epoch mean train loss = tensor(0.5011)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.5097)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 4, Train F1 score: 0.5786, Train IoU: 0.5112, Val F1 score: 0.5463, Val IoU: 0.4624
Train F1 score for each class: [8.7675446e-01 3.2855649e-05 8.5915118e-01], 
Train IoU for each class: [7.8055441e-01 1.6428094e-05 7.5308067e-01]
Test FAR: 0.1842
Test F1 score for each class: [0.8229123 0.0010144 0.814825 ], 
Test IoU for each class: [6.9910884e-01 5.0745520e-04 6.8751448e-01]
epoch_n = 5
-----------training process---------


 25%|████████████████████▍                                                            | 96/381 [01:32<04:37,  1.03it/s]


** Ranger21 update = Warmup complete - lr set to 0.001



100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [06:01<00:00,  1.05it/s]


after training epoch mean train loss = tensor(0.4991)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.5041)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 5, Train F1 score: 0.5883, Train IoU: 0.5183, Val F1 score: 0.5815, Val IoU: 0.4879
Train F1 score for each class: [0.8789801  0.02248845 0.8633402 ], 
Train IoU for each class: [0.7840897  0.01137209 0.7595414 ]
Test FAR: 0.1735
Test F1 score for each class: [0.8298617  0.08375003 0.8309041 ], 
Test IoU for each class: [0.7091997  0.04370517 0.71072364]
epoch_n = 6
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:57<00:00,  1.07it/s]


after training epoch mean train loss = tensor(0.4964)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.5011)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 6, Train F1 score: 0.6438, Train IoU: 0.5520, Val F1 score: 0.5979, Val IoU: 0.5007
Train F1 score for each class: [0.8846987  0.18098597 0.865662  ], 
Train IoU for each class: [0.79323745 0.09949674 0.76314294]
Test FAR: 0.1680
Test F1 score for each class: [0.8346187  0.1209437  0.83820105], 
Test IoU for each class: [0.71617645 0.06436406 0.7214682 ]
best model is on epoch = 6
 model updated
epoch_n = 7
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:55<00:00,  1.07it/s]


after training epoch mean train loss = tensor(0.4936)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.52it/s]


valid loss = tensor(0.5025)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 7, Train F1 score: 0.6881, Train IoU: 0.5829, Val F1 score: 0.6047, Val IoU: 0.5030
Train F1 score for each class: [0.8869419  0.30686718 0.87038076], 
Train IoU for each class: [0.79685146 0.18124224 0.7705081 ]
Test FAR: 0.1701
Test F1 score for each class: [0.8361141  0.14660276 0.83144087], 
Test IoU for each class: [0.71838146 0.07909948 0.7115094 ]
best model is on epoch = 7
 model updated
epoch_n = 8
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:50<00:00,  1.09it/s]


after training epoch mean train loss = tensor(0.4923)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.4983)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 8, Train F1 score: 0.6995, Train IoU: 0.5922, Val F1 score: 0.6075, Val IoU: 0.5097
Train F1 score for each class: [0.88804126 0.33723706 0.87332106], 
Train IoU for each class: [0.79862773 0.20281729 0.7751286 ]
Test FAR: 0.1626
Test F1 score for each class: [0.8383981  0.13809136 0.8460943 ], 
Test IoU for each class: [0.7217603  0.07416655 0.7332439 ]
best model is on epoch = 8
 model updated
epoch_n = 9
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:47<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4917)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.63it/s]


valid loss = tensor(0.4995)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 9, Train F1 score: 0.7133, Train IoU: 0.6027, Val F1 score: 0.6082, Val IoU: 0.5085
Train F1 score for each class: [0.8891569  0.37754267 0.87329364], 
Train IoU for each class: [0.80043423 0.23269807 0.7750855 ]
Test FAR: 0.1651
Test F1 score for each class: [0.839938   0.14555001 0.83920634], 
Test IoU for each class: [0.72404563 0.07848689 0.72295904]
epoch_n = 10
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:48<00:00,  1.09it/s]


after training epoch mean train loss = tensor(0.4909)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.5067)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 10, Train F1 score: 0.7227, Train IoU: 0.6103, Val F1 score: 0.6022, Val IoU: 0.4965
Train F1 score for each class: [0.8908566  0.40424538 0.87292475], 
Train IoU for each class: [0.8031935  0.25332552 0.7745045 ]
Test FAR: 0.1785
Test F1 score for each class: [0.8320855  0.15608314 0.81831086], 
Test IoU for each class: [0.712454  0.0846476 0.6924925]
epoch_n = 11
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:47<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4900)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.62it/s]


valid loss = tensor(0.4970)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 11, Train F1 score: 0.7274, Train IoU: 0.6150, Val F1 score: 0.6251, Val IoU: 0.5214
Train F1 score for each class: [0.89157045 0.4149957  0.87572634], 
Train IoU for each class: [0.8043548  0.26182625 0.7789263 ]
Test FAR: 0.1599
Test F1 score for each class: [0.84142625 0.18626893 0.84749585], 
Test IoU for each class: [0.7262603  0.10269931 0.7353517 ]
best model is on epoch = 11
 model updated
epoch_n = 12
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:45<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4890)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.59it/s]


valid loss = tensor(0.4974)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 12, Train F1 score: 0.7334, Train IoU: 0.6207, Val F1 score: 0.6106, Val IoU: 0.5126
Train F1 score for each class: [0.89251006 0.42972192 0.8780094 ], 
Train IoU for each class: [0.8058855  0.27365977 0.7825461 ]
Test FAR: 0.1605
Test F1 score for each class: [0.84003913 0.14385587 0.8479794 ], 
Test IoU for each class: [0.72419614 0.07750253 0.73608017]
epoch_n = 13
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:47<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4885)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.54it/s]


valid loss = tensor(0.4957)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 13, Train F1 score: 0.7370, Train IoU: 0.6240, Val F1 score: 0.6169, Val IoU: 0.5183
Train F1 score for each class: [0.89302444 0.43890026 0.87896854], 
Train IoU for each class: [0.80672455 0.28114814 0.78407127]
Test FAR: 0.1567
Test F1 score for each class: [0.8434618  0.15598525 0.8512515 ], 
Test IoU for each class: [0.72929883 0.08459002 0.7410251 ]
epoch_n = 14
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:45<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4874)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.4980)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 14, Train F1 score: 0.7417, Train IoU: 0.6288, Val F1 score: 0.6091, Val IoU: 0.5113
Train F1 score for each class: [0.89570165 0.44990993 0.87961656], 
Train IoU for each class: [0.8111048  0.2902476  0.78510314]
Test FAR: 0.1617
Test F1 score for each class: [0.84287477 0.14079162 0.8437271 ], 
Test IoU for each class: [0.72842133 0.07572665 0.7296955 ]
epoch_n = 15
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4876)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.53it/s]


valid loss = tensor(0.5006)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 15, Train F1 score: 0.7398, Train IoU: 0.6272, Val F1 score: 0.6053, Val IoU: 0.5058
Train F1 score for each class: [0.8945906  0.44409084 0.8806756 ], 
Train IoU for each class: [0.80928457 0.2854221  0.7867919 ]
Test FAR: 0.1658
Test F1 score for each class: [0.8375941  0.14001456 0.83818233], 
Test IoU for each class: [0.7205693  0.07527724 0.72144043]
epoch_n = 16
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:46<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4857)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.4941)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 16, Train F1 score: 0.7457, Train IoU: 0.6337, Val F1 score: 0.6256, Val IoU: 0.5253
Train F1 score for each class: [0.8967649  0.45538533 0.88484263], 
Train IoU for each class: [0.81285024 0.2948213  0.7934688 ]
Test FAR: 0.1545
Test F1 score for each class: [0.84503794 0.17655528 0.8553564 ], 
Test IoU for each class: [0.7316586  0.09682513 0.7472686 ]
best model is on epoch = 16
 model updated
epoch_n = 17
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:46<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4869)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.53it/s]


valid loss = tensor(0.5073)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 17, Train F1 score: 0.7468, Train IoU: 0.6334, Val F1 score: 0.6040, Val IoU: 0.4978
Train F1 score for each class: [0.8960391  0.46353155 0.8807085 ], 
Train IoU for each class: [0.81165844 0.30168635 0.78684473]
Test FAR: 0.1806
Test F1 score for each class: [0.83190525 0.16113098 0.818993  ], 
Test IoU for each class: [0.71218985 0.08762505 0.69347006]
epoch_n = 18
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:42<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4860)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.55it/s]


valid loss = tensor(0.4930)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 18, Train F1 score: 0.7495, Train IoU: 0.6364, Val F1 score: 0.6188, Val IoU: 0.5222
Train F1 score for each class: [0.89617085 0.46882227 0.8834866 ], 
Train IoU for each class: [0.81187457 0.30618408 0.7912907 ]
Test FAR: 0.1518
Test F1 score for each class: [0.84697133 0.15263656 0.8567538 ], 
Test IoU for each class: [0.73456216 0.082624   0.74940455]
epoch_n = 19
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4855)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.4946)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 19, Train F1 score: 0.7526, Train IoU: 0.6395, Val F1 score: 0.6294, Val IoU: 0.5269
Train F1 score for each class: [0.89754796 0.47658223 0.8837062 ], 
Train IoU for each class: [0.8141377  0.3128375  0.79164314]
Test FAR: 0.1552
Test F1 score for each class: [0.84664285 0.18999986 0.85162467], 
Test IoU for each class: [0.7340681  0.10497229 0.7415909 ]
best model is on epoch = 19
 model updated
epoch_n = 20
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4849)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.53it/s]


valid loss = tensor(0.4925)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 20, Train F1 score: 0.7566, Train IoU: 0.6435, Val F1 score: 0.6095, Val IoU: 0.5175
Train F1 score for each class: [0.89946115 0.48687655 0.8835707 ], 
Train IoU for each class: [0.8172918  0.32176924 0.7914255 ]
Test FAR: 0.1507
Test F1 score for each class: [0.8472576  0.12303608 0.85832393], 
Test IoU for each class: [0.73499304 0.06555058 0.7518104 ]
epoch_n = 21
-----------training process---------


 60%|███████████████████████████████████████████████▊                                | 228/381 [03:25<02:15,  1.13it/s]


** Ranger21 update: Warmdown starting now.  Current iteration = 8229....



100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4851)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.54it/s]


valid loss = tensor(0.4918)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 21, Train F1 score: 0.7608, Train IoU: 0.6469, Val F1 score: 0.6386, Val IoU: 0.5356
Train F1 score for each class: [0.8976804  0.500416   0.88430244], 
Train IoU for each class: [0.8143559  0.33370322 0.7926005 ]
Test FAR: 0.1496
Test F1 score for each class: [0.8499689  0.2075715  0.85837024], 
Test IoU for each class: [0.7390834  0.11580462 0.7518814 ]
best model is on epoch = 21
 model updated
epoch_n = 22
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:46<00:00,  1.10it/s]


after training epoch mean train loss = tensor(0.4840)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.56it/s]


valid loss = tensor(0.4902)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 22, Train F1 score: 0.7609, Train IoU: 0.6480, Val F1 score: 0.6274, Val IoU: 0.5305
Train F1 score for each class: [0.90055865 0.49649936 0.88564694], 
Train IoU for each class: [0.8191057 0.3302289 0.7947634]
Test FAR: 0.1466
Test F1 score for each class: [0.8467037  0.1684154  0.86717516], 
Test IoU for each class: [0.73415977 0.09195065 0.765498  ]
epoch_n = 23
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:44<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4832)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.55it/s]


valid loss = tensor(0.4927)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 23, Train F1 score: 0.7646, Train IoU: 0.6520, Val F1 score: 0.6290, Val IoU: 0.5292
Train F1 score for each class: [0.9012998 0.5047895 0.8877366], 
Train IoU for each class: [0.82033277 0.33760428 0.7981352 ]
Test FAR: 0.1507
Test F1 score for each class: [0.8499105  0.18011056 0.85685354], 
Test IoU for each class: [0.73899513 0.09896786 0.7495572 ]
epoch_n = 24
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4833)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.53it/s]


valid loss = tensor(0.4939)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 24, Train F1 score: 0.7656, Train IoU: 0.6526, Val F1 score: 0.6284, Val IoU: 0.5272
Train F1 score for each class: [0.9015188  0.50901586 0.88630545], 
Train IoU for each class: [0.8206957  0.34139588 0.7958246 ]
Test FAR: 0.1533
Test F1 score for each class: [0.8475981  0.18364444 0.8538898 ], 
Test IoU for each class: [0.73550546 0.101106   0.74503297]
epoch_n = 25
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:40<00:00,  1.12it/s]


after training epoch mean train loss = tensor(0.4827)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:28<00:00,  2.57it/s]


valid loss = tensor(0.4909)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 25, Train F1 score: 0.7684, Train IoU: 0.6557, Val F1 score: 0.6331, Val IoU: 0.5336
Train F1 score for each class: [0.9019317 0.5152307 0.8880538], 
Train IoU for each class: [0.8213804 0.3470106 0.7986482]
Test FAR: 0.1474
Test F1 score for each class: [0.8499129  0.18625589 0.8630005 ], 
Test IoU for each class: [0.73899865 0.10269138 0.75901574]
epoch_n = 26
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:44<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4816)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.53it/s]


valid loss = tensor(0.4919)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 26, Train F1 score: 0.7702, Train IoU: 0.6585, Val F1 score: 0.6275, Val IoU: 0.5290
Train F1 score for each class: [0.9039018  0.51557887 0.8910124 ], 
Train IoU for each class: [0.8246541  0.34732658 0.80344653]
Test FAR: 0.1494
Test F1 score for each class: [0.8495062  0.17332537 0.8596212 ], 
Test IoU for each class: [0.7383839  0.09488574 0.7538032 ]
epoch_n = 27
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:42<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4816)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.52it/s]


valid loss = tensor(0.4912)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 27, Train F1 score: 0.7699, Train IoU: 0.6581, Val F1 score: 0.6337, Val IoU: 0.5336
Train F1 score for each class: [0.9044793 0.515518  0.8897259], 
Train IoU for each class: [0.825616   0.34727132 0.801357  ]
Test FAR: 0.1481
Test F1 score for each class: [0.850517   0.18961555 0.8611071 ], 
Test IoU for each class: [0.73991257 0.10473772 0.7560913 ]
epoch_n = 28
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


after training epoch mean train loss = tensor(0.4813)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.52it/s]


valid loss = tensor(0.4916)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 28, Train F1 score: 0.7718, Train IoU: 0.6601, Val F1 score: 0.6328, Val IoU: 0.5326
Train F1 score for each class: [0.9035033  0.51979727 0.8920719 ], 
Train IoU for each class: [0.8239907  0.35116625 0.80517125]
Test FAR: 0.1488
Test F1 score for each class: [0.850852   0.18797576 0.85957956], 
Test IoU for each class: [0.74041986 0.103738   0.75373936]
epoch_n = 29
-----------training process---------


100%|████████████████████████████████████████████████████████████████████████████████| 381/381 [05:43<00:00,  1.11it/s]


error in warmdown - lr below min lr. current lr = 2.999999999999997e-05
auto handling but please report issue!
after training epoch mean train loss = tensor(0.4815)
-----------validation process---------


100%|██████████████████████████████████████████████████████████████████████████████████| 74/74 [00:29<00:00,  2.49it/s]

valid loss = tensor(0.4920)
######### METRICS AFTER TRAINING EPOCH #########
Epoch 29, Train F1 score: 0.7717, Train IoU: 0.6597, Val F1 score: 0.6319, Val IoU: 0.5317
Train F1 score for each class: [0.90449065 0.5208447  0.8896654 ], 
Train IoU for each class: [0.82563484 0.35212305 0.8012589 ]
Test FAR: 0.1494
Test F1 score for each class: [0.8500447  0.18663645 0.858999  ], 
Test IoU for each class: [0.7391981  0.10292279 0.75284684]





## BLENDING SEVERAL MODELS via MODELS.SOUP TO RAISE QUALITY

In [290]:
# PREPARE A LIST OF SEVERAL MODELS WITH THE SAME ARCHITECTURE TO SOUP THEIR WEIGHTS
model_path1 = '1st_model_dir'
model_path2 = '2nd_model_dir'
model_path3 = '3rd_model_dir'
model_path_list = [model_path1,model_path2,model_path3]
for i, model_path in enumerate(model_path_list):
    print(model_path)

H:\ELECTRO_DATASET\4_km_res\L2/models/MAnet_Efficient_b0_12_inputs_4km_res_ep_216
H:\ELECTRO_DATASET\4_km_res\L2/models/MAnet_Efficient_b0_12_inputs_4km_res_ep_198
H:\ELECTRO_DATASET\4_km_res\L2/models/MAnet_Efficient_b0_12_inputs_4km_res_ep_175


In [291]:
def uniform_soup(model, path, device = "cpu", by_name = False):
    try:
        import torch
    except:
        print("If you want to use 'Model Soup for Torch', please install 'torch'")
        return model
        
    if not isinstance(path, list):
        path = [path]
    model = model.to(device)
    model_dict = model.state_dict()
    soups = {key:[] for key in model_dict}
    for i, model_path in enumerate(path):
        weight = torch.load(model_path, map_location = device)
        weight_dict = weight.state_dict() if hasattr(weight, "state_dict") else weight
        if by_name:
            weight_dict = {k:v for k, v in weight_dict.items() if k in model_dict}
        for k, v in weight_dict.items():
            soups[k].append(v)
    if 0 < len(soups):
        soups = {k:(torch.sum(torch.stack(v), axis = 0) / len(v)).type(v[0].dtype) for k, v in soups.items() if len(v) != 0}
        model_dict.update(soups)
        model.load_state_dict(model_dict)
    return model
print("\n[Uniform Soup Performance]")
souped_model = uniform_soup(model, model_path_list, device = device)
# test_model(souped_model, valid_dl_)
test_model(souped_model, test_dl_)


[Uniform Soup Performance]
-----------Testing process---------


100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [01:20<00:00,  1.24it/s]

Test F1 score: 0.8077, Test IoU: 0.7432
Test FAR: 0.1093
Test F1 score for each class(bg, snow, cloud): [0.8006987  0.74544346 0.8768815 ], 
Test IoU for each class(bg, snow, cloud): [0.6924185 0.7397989 0.7972732]





In [292]:
def save_souped_model(model,num_models=3, path =  r'H:\ELECTRO_DATASET\4_km_res\L2/models/MAnet_Efficient_b0_12_inputs_4km_res_souped'):
        model_copy = deepcopy(model)
        best_model = model_copy
        best_model_name = path+'_3_models'
        torch.save(best_model.state_dict(),best_model_name)
        print('souped model saved!')
        return best_model
save_souped_model(souped_model)

souped model saved!


MAnet(
  (encoder): EfficientNetEncoder(
    (_conv_stem): Conv2dStaticSamePadding(
      12, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d((0, 1, 0, 1))
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d((1, 1, 1, 1))
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          32, 8, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_se_expand): Conv2dStaticSamePadding(
          8, 32, kernel_size=(1, 1), stride=(1, 1)
          (static_padding): Identity()
        )
        (_project_conv): Conv2dStaticSamePaddi