1. # 🛠 Install Libraries

## For PC

In [1]:
#!pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
#!pip install --user numpy 
#!pip install --user pandas 
#!pip install  segmentation-models-pytorch
# !python -m pip install opencv-python
# !pip install tensorflow
# !pip install -q scikit-learn==1.0
#!pip install plotly
# !pip install --user albumentations
# import sys  
# !{sys.executable} -m pip install --user matplotlib
#!pip install ipywidgets --user
#!pip install -U albumentations[imgaug]

## For Kaggle !!

In [2]:
#!pip install  segmentation-models-pytorch

# 📚 Import Libraries  


In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import numpy as np
import pandas as pd
pd.options.plotting.backend = "plotly"
import segmentation_models_pytorch as smp
import random
from glob import glob
import os, shutil
from tqdm import tqdm
tqdm.pandas()
import time
import copy

from IPython import display as ipd
from PIL import Image

# visualization
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# Sklearn
import sklearn
from sklearn.model_selection import train_test_split

# PyTorch 
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import timm

# Albumentations for augmentations
import albumentations as A

# For colored terminal text
from colorama import Fore, Back, Style
c_  = Fore.GREEN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

# gc
import gc

import shutil, os

## Versions

In [5]:
print(f'Torch version{torch.__version__}')
print('The scikit-learn version is {}.'.format(sklearn.__version__))
import platform
print(f"Python version: {platform.python_version()}")

Torch version1.11.0+cu113
The scikit-learn version is 1.0.2.
Python version: 3.9.12


# ⚙️ Configuration 

In [6]:
class CFG:
    JUST_PREDICT  = False
    Kaggle        = False 
    DEBUG         = False
    wandb_on      = False
    seed          = 101
    MULTIMODEL    = False
    weights       = 'imagenet'
    backbone      = 'efficientnet-b2'
    models        = []
    optimizers    = []
################################################### 
    num_of_models = 1
    model_number  = 1
    train_bs      = 4
    valid_bs      = 4
    number_imgs   = 100 if DEBUG else 8203     #8203
    num_test      = 10 if DEBUG else 1000      # 1000
    print_every   = 1  if DEBUG else 100      #500
    img_size      = [256, 256] #[540, 960]
    start_width   = 512
    start_height  = 512
    final_width   = 512
    final_height  = 512
    epochs        = 2  if DEBUG else 80        #35
    ###############################################
    crop_koef     = 1
    lr            = 0.002
    num_workers   = 4 if Kaggle else 0
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(30000/train_bs*epochs)+50
    T_0           = 25
    warmup_epochs = 0
    wd            = 0 #1e-6
    n_accumulate  = max(1, 32//train_bs)
    n_fold        = 5
    num_classes   = 4
    classes       = [0,6,7,10]
    activation    = None #'softmax'
    device        = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
    images_path   = "../input/russian-railways-2/images/images/" if Kaggle else "../train/images" 
    masks_path    = "../input/russian-railways-2/mask/mask/" if Kaggle else  "../train/mask/"
    test_path     = "../input/russian-railways-2/test/test/" if Kaggle else "../test/"
    save_path     = '../working/result/' if Kaggle else "./result/"
    best_model_w  = '../input/russian-railways-2/best_epoch_ofu-efficientnet-b1_v2.bin' if Kaggle else './last_epoch_ofu-efficientnet-b1_v2.bin'


# ❗ Reproducibility

In [7]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    print('> SEEDING DONE')
    
set_seed(CFG.seed)

> SEEDING DONE


# 📈 Visualization

In [8]:
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

In [9]:
gc.collect() # gc.collect() возвращает количество объектов, которые были собраны и удалены.

0

# 📦 Model


In [10]:

import segmentation_models_pytorch as smp



    
##################################################################################################################################################################    
    
##################################################################################################################################################################    
def build_model(indx):
    if indx == 1: #'model_Unet':
        model = smp.Unet(
            encoder_name=CFG.backbone,      
            encoder_weights="imagenet",     
            in_channels=3,                  
            classes=CFG.num_classes,       
            activation=CFG.activation)
    
    model.to(CFG.device)
    CFG.models = [model]
    return  model


def load_models(pash):
    for model in CFG.models:
        model = build_model(model)


def load_model(path,indx):
    model = build_model(indx)
    model.load_state_dict(torch.load(path))
    model.eval()
    model.to(CFG.device)
    CFG.models = [model]
    return model

# 🔧 Loss Function

In [11]:
JaccardLoss = smp.losses.JaccardLoss(mode='multilabel') # Intersection over Union: like dice (index) The Intersection-Over-Union (IoU), also known as the Jaccard Index
DiceLoss    = smp.losses.DiceLoss(mode='multilabel')
BCELoss     = smp.losses.SoftBCEWithLogitsLoss()
LovaszLoss  = smp.losses.LovaszLoss(mode='multilabel', per_image=False)
TverskyLoss = smp.losses.TverskyLoss(mode='multilabel', log_loss=False) # parametrised Jaccard loss || log_loss – If True, --> -log(tversky) else 1 - tversky

def dice_coef(y_true, y_pred, thr=0.5, dim=(2,3), epsilon=0.001):
    y_true = y_true.to(torch.float32)
    y_pred = (y_pred>thr).to(torch.float32)
    inter = (y_true*y_pred).sum(dim=dim)
    den = y_true.sum(dim=dim) + y_pred.sum(dim=dim)
    dice = ((2*inter+epsilon)/(den+epsilon)).mean(dim=(1,0))
    return dice

def iou_coef(y_true, y_pred, thr=0.5, dim=(2,3), epsilon=0.001):
    y_true = y_true.to(torch.float32)
    y_pred = (y_pred>thr).to(torch.float32)
    inter = (y_true*y_pred).sum(dim=dim)
    union = (y_true + y_pred - y_true*y_pred).sum(dim=dim)
    iou = ((inter+epsilon)/(union+epsilon)).mean(dim=(1,0))
    return iou

def criterion(y_pred, y_true):
    return 0.4*BCELoss(y_pred, y_true) + 0.6*TverskyLoss(y_pred, y_true)

# 🚄 Training Function

In [12]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch ):
    model.train()
    scaler = amp.GradScaler()
    
    dataset_size = 0
    running_loss = 0.0
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train ')
    for step, (images, masks) in pbar:  
    #step = 1
        images = images.to(device, dtype=torch.float)
        masks  = masks.to(device, dtype=torch.float)

        batch_size = images.size(0)

        with amp.autocast(enabled=True): # choose best presition (float32 or float16 for each operation to speed up and best performance)


    ###################### FORWARD ######################################################################

            y_pred = model(images)
            loss   = criterion(y_pred, masks)
            loss   = loss / CFG.n_accumulate 

    ####################### BACKWARD #####################################################################  

                # Exits autocast before backward().
                # Backward passes under autocast are not recommended.
                # Backward ops run in the same dtype autocast chose for corresponding forward ops.

    ########################################################################################################
        scaler.scale(loss).backward()

        if (step + 1) % CFG.n_accumulate == 0: # остаток от деления это сделано верно
            scaler.step(optimizer) #step - обновление весов модели
            scaler.update()

            # zero the parameter gradients
            optimizer.zero_grad() #zero_grad - занулить веса модели (по умолчанию градиенты в PyTorch аккумулируются) 

            if scheduler is not None: 
                scheduler.step()      
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(train_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_mem=f'{mem:0.2f} GB')
        
 #########################################################################################################################       
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss

# 👀 Validation Function

In [13]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval() # remove dropout, batchnorm # we use it when checking results
    
    dataset_size = 0
    running_loss = 0.0
    
    val_scores = []
    
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ')
    for step, (images, masks) in pbar:        
        images  = images.to(device, dtype=torch.float)
        masks   = masks.to(device, dtype=torch.float)
        
        batch_size = images.size(0)
        
################## FORWARD ##################################################

        y_pred  = model(images)
        loss    = criterion(y_pred, masks)
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        y_pred = nn.Sigmoid()(y_pred) # ?????
        val_dice = dice_coef(masks, y_pred).cpu().detach().numpy()
        val_jaccard = iou_coef(masks, y_pred).cpu().detach().numpy()
        val_scores.append([val_dice, val_jaccard])
        
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr'] # returns actual LR
        pbar.set_postfix(valid_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_memory=f'{mem:0.2f} GB')
    val_scores  = np.mean(val_scores, axis=0)
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss, val_scores

# 🏃 Run Training (Training loop)

In [14]:
def run_training(model, optimizer, scheduler, device, num_epochs):
    # To automatically log gradients
    if CFG.wandb_on:
        wandb.watch(model, log_freq=100)
    
    if torch.cuda.is_available():
        print("cuda: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_dice      = -np.inf
    best_jaccard   = -np.inf
    best_epoch     = -1
    
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect() # garbage collector - to delete references and objects that not exist any more
        
        print(f'Epoch {epoch}/{num_epochs}', end='')
        train_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=CFG.device, epoch=epoch)
        
        val_loss, val_scores = valid_one_epoch(model, valid_loader, 
                                                 device=CFG.device, 
                                                 epoch=epoch)
        val_dice, val_jaccard = val_scores
    
        
        #Log the metrics
        if CFG.wandb_on:
            wandb.log({"Train Loss": train_loss, 
                       "Valid Loss": val_loss,
                       "Valid Dice": val_dice,
                       "Valid Jaccard": val_jaccard,
                        "LR": optimizer.param_groups[0]['lr'] # returns actual LRscheduler.get_last_lr()[0]
                      })
        
        print(f'Valid Dice: {val_dice:0.4f} | Valid Jaccard: {val_jaccard:0.4f}')
        
        # deep copy the model
        if val_dice >= best_dice: 
            print(f"{c_}Valid Score  (IOU) ({best_jaccard:0.4f} ---> {val_jaccard:0.4f})")
            print(f"{c_}Valid Score Improved(DICE) ({best_dice:0.4f} ---> {val_dice:0.4f})")
            best_dice    = val_dice
            best_jaccard = val_jaccard
            best_epoch   = epoch
            if CFG.wandb_on:
                run.summary["Best Dice"]    = best_dice
                run.summary["Best Jaccard"] = best_jaccard
                run.summary["Best Epoch"]   = best_epoch
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = f"best_epoch_of{model.name}_v2.bin"
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            if CFG.wandb_on:
                wandb.save(PATH)
                print(f"Model Saved{sr_}")
            
        last_model_wts = copy.deepcopy(model.state_dict())
        PATH = f"last_epoch_of{model.name}_v2.bin"
        torch.save(model.state_dict(), PATH)
            
        print(); print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Score: {:.4f}".format(best_jaccard))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model #, history

## Loading models !

In [15]:
if CFG.JUST_PREDICT:
#################################################################################################################################
    try:
        model = load_model(CFG.best_model_w,1) 
        preprocessing_fn = smp.encoders.get_preprocessing_fn(CFG.backbone, CFG.weights)
        print(f'WE WILL TRAIN TRAINED MODEL: {model.name} !!!')
    except Exception:
        model = build_model(CFG.model_number)
        preprocessing_fn = smp.encoders.get_preprocessing_fn(CFG.backbone, CFG.weights)
        print(f'WE WILL TRAIN NEW MODEL: {model.name} !!!')
##########################################################################################


else:
    model = build_model(CFG.model_number)  
    preprocessing_fn = smp.encoders.get_preprocessing_fn(CFG.backbone, CFG.weights)
    print(f'WE TRAIN NEW MODEL {model.name}')


WE TRAIN NEW MODEL u-timm-mobilenetv3_small_minimal_100


In [16]:


def load_img(path):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

def load_msk(path):
    msk = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # or msk=cv2.imread(path, 0)
    masks = [(msk == v) for v in CFG.classes]
    msk = np.stack(masks, axis=-1).astype('float')
    return msk
    
def show_img(img, mask=None):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    plt.imshow(img, cmap='bone')
    
    
    if mask is not None:
        plt.imshow(mask, alpha=0.5)
        handles = [Rectangle((0,0),1,1, color=_c) for _c in [(0.667,0.0,0.0), (0.0,0.667,0.0), (0.0,0.0,0.667)]]
        labels = ["main rails", "rails", "Trains", 'Background']
        plt.legend(handles,labels)
    plt.axis('off')
    
    
def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        A.Lambda(image=preprocessing_fn),
        A.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return A.Compose(_transform)

def get_preprocessing_test(preprocessing_fn):
    _transform = [
        A.Lambda(image=preprocessing_fn),
        A.Lambda(image=to_tensor),
    ]
    return A.Compose(_transform)

In [17]:
class BuildDataset(torch.utils.data.Dataset):
#      """ Read images, apply augmentation and preprocessing transformations.
#     Args:
#         images_dir (str): path to images folder
#         masks_dir (str): path to segmentation masks folder
#         class_values (list): values of classes to extract from segmentation mask
#         augmentation (albumentations.Compose): data transfromation pipeline 
#             (e.g. flip, scale, etc.)
#         preprocessing (albumentations.Compose): data preprocessing 
#             (e.g. noralization, shape manipulation, etc.)

    def __init__(self, images_paths, masks_paths = None, label=True, transforms=None,  preprocessing= None):
        self.label      = label
        self.img_paths  = images_paths
        self.msk_paths  = masks_paths
        self.transforms = transforms
        self.preprocessing = preprocessing
        self.preprocessing_img = get_preprocessing_test(preprocessing_fn)
    def __len__(self):
        return len(self.img_paths)
    
    
    def __getitem__(self, index):
        img_path  = self.img_paths[index]
        img = load_img(img_path)
        
        if self.label: # WHEN WE TRAIN 
            msk_path = self.msk_paths[index]
            msk = load_msk(msk_path)
            
            if self.transforms:
                data = self.transforms(image=img, mask=msk)
                img, msk  = data['image'], data['mask']
            if self.preprocessing:
                data = self.preprocessing(image=img, mask=msk)
                img, msk  = data['image'], data['mask']
            return img, msk
        else: # WHEN WE PREDICT
            if self.transforms:
                data = self.transforms(image=img)
                img  = data['image']
            if self.preprocessing:
                data =  self.preprocessing_img(image=img)
                img = data['image']
            return img    

In [18]:
#A.Sharpen?

In [19]:
data_transforms = {
    "train": A.Compose([
       A.HorizontalFlip(p=0.5),
        A.Resize(height=CFG.start_height, width=CFG.start_width, interpolation=cv2.INTER_NEAREST),
        A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=0.3, border_mode=0),
        A.OneOf(
            [
                 A.RandomCrop(height=CFG.final_height, width=CFG.final_width, always_apply=True),
                 A.CenterCrop(height =CFG.final_height, width =CFG.final_width,p =1.0),
            ],
            p=1.0,
        ),
         ], p=1.0),
    
    "valid": A.Compose([
        A.Resize(height=CFG.start_height, width=CFG.start_width, interpolation=cv2.INTER_NEAREST),
        
        ], p=1.0)
}

# 🍰 DataLoader

In [20]:
def prepare_loaders():
    
    img_names= [ os.path.join(CFG.images_path,img_name) for img_name in os.listdir(CFG.images_path)]
    masks_names = [ os.path.join(CFG.masks_path,mask_name) for mask_name in os.listdir(CFG.masks_path)]
    img_names = img_names[0:CFG.number_imgs]
    masks_names=masks_names[0:CFG.number_imgs]
    image_train, image_valid, mask_train, mask_valid = train_test_split(img_names, masks_names, test_size=0.2, random_state=CFG.seed)

    
    
    train_dataset = BuildDataset(image_train, mask_train, transforms=data_transforms['train'],preprocessing=get_preprocessing(preprocessing_fn))
    valid_dataset = BuildDataset(image_valid, mask_valid, transforms=data_transforms['valid'],preprocessing=get_preprocessing(preprocessing_fn))

    train_loader = DataLoader(train_dataset, batch_size=CFG.train_bs, 
                              num_workers=CFG.num_workers, shuffle=True, pin_memory=True, drop_last=False)
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.valid_bs, 
                              num_workers=CFG.num_workers, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

In [21]:
train_loader, valid_loader = prepare_loaders()

In [22]:
imgs, msks = next(iter(train_loader))
imgs.size(), msks.size()

(torch.Size([1, 3, 128, 128]), torch.Size([1, 4, 128, 128]))

## Optimiser and Scheduler

In [23]:
def fetch_scheduler(optimizer):
    if CFG.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CFG.T_max, ##  <---
                                                   eta_min=CFG.min_lr)
    elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CFG.T_0, 
                                                             eta_min=CFG.min_lr)
    elif CFG.scheduler == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   patience=7,
                                                   threshold=0.0001,
                                                   min_lr=CFG.min_lr,)
    elif CFG.scheduer == 'ExponentialLR':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
    elif CFG.scheduler == None:
        return None
        
    return scheduler

In [24]:
def build_optimizers(number):
    if number == 1:
        optimizer_1 = optim.Adam(CFG.models[0].parameters(), lr=CFG.lr, weight_decay=CFG.wd)
        CFG.optimizers = [optimizer_1]
    return CFG.optimizers

In [25]:

optimizers = build_optimizers(1)
scheduler =  fetch_scheduler(CFG.optimizers[0])# None # Decrease LR when needed, useless with  Adam (has adaptive LR) optimizer_1

# 🚅 Training

In [26]:
torch.cuda.empty_cache()

In [27]:
if not CFG.JUST_PREDICT:
    for ind,  model in enumerate(CFG.models, start=0):
        train_loader, valid_loader = prepare_loaders()
        optimizer = CFG.optimizers[ind]
        model = run_training(model, optimizer, scheduler,
                                      device=CFG.device,
                                      num_epochs=CFG.epochs)

cuda: NVIDIA GeForce GTX 1060 6GB

Epoch 1/80

Train :   2%|▎                      | 104/6562 [00:41<43:24,  2.48it/s, gpu_mem=0.11 GB, lr=0.00350, train_loss=0.0186]


KeyboardInterrupt: 