## Summary
#### [<u>Initial version</u>](https://www.kaggle.com/code/egortrushin/gr-icrgw-pytorch-lightning-baseline-unet-resnest)
- Baseline written using Pytorch Lightning
- resnest26d as encoder
- Dataset is taken from: https://www.kaggle.com/datasets/shashwatraman/contrails-images-ash-color

#### [<u>Improved version</u>](https://www.kaggle.com/code/egortrushin/gr-icrgw-pl-pipeline-improved)
- Option to change image size
- Mixed precision training (only useful with T4x2, on P100 this slows down training). This helps to use GPU memory more efficiently
- Training using 2 GPUs - with 2 GPUs we have more memory and higher speed
- Other numerous small changes

#### <u>Present version</u>
- Training with 4-folds
- LR scheduler: cosine with warmup
- Use of CSVLogger with consequent visualization of the optimization process. Since I train without internet, I am limited to *local* CSVLogger or TensorBoardLogger. Alternatively you can train with internet and WanddbLogger.
- Submission part is rewritten to make it cleaner and to allow easy work with multi-fold models

### Training part

In [1]:
import sys
sys.path.append("../input/pretrained-models-pytorch")
sys.path.append("../input/efficientnet-pytorch")
sys.path.append("/kaggle/input/smp-github/segmentation_models.pytorch-master")
sys.path.append("/kaggle/input/timm-pretrained-resnest/resnest/")
import segmentation_models_pytorch as smp

  if block_type is 'proj':
  elif block_type is 'down':
  assert block_type is 'normal'


In [2]:
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/timm-pretrained-resnest/resnest/gluon_resnest26-50eb607c.pth /root/.cache/torch/hub/checkpoints/gluon_resnest26-50eb607c.pth

In [3]:
%%writefile config.yaml

data_path: "/kaggle/input/contrails-images-ash-color"
output_dir: "models"

folds:
    n_splits: 4
    random_state: 42
train_folds: [0]
    
seed: 42

train_bs: 16
valid_bs: 64
workers: 2

progress_bar_refresh_rate: 1

early_stop:
    monitor: "val_loss"
    mode: "min"
    patience: 999
    verbose: 1

trainer:
    max_epochs: 20
    min_epochs: 20
    enable_progress_bar: True
    precision: "16-mixed"
    devices: 2

model:
    seg_model: "Unet"
    encoder_name: "tu-maxvit_rmlp_base_rw_384" 
    loss_smooth: 1.0
    image_size: 384
    optimizer_params:
        lr: 0.0005
        weight_decay: 0.0
    scheduler:
        name: "cosine_with_hard_restarts_schedule_with_warmup"
        params:
            cosine_with_hard_restarts_schedule_with_warmup:
                num_warmup_steps: 350
                num_training_steps: 3150
                num_cycles: 1

Writing config.yaml


### Submission part

In [4]:
import warnings
warnings.filterwarnings("ignore")

import gc
import os
import glob

import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
import torchvision.transforms as T
import yaml

with open("config.yaml", "r") as file_obj:
    config = yaml.safe_load(file_obj)

In [5]:
batch_size = 16
num_workers = 2
THR = 0.4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = '/kaggle/input/google-research-identify-contrails-reduce-global-warming'
data_root = '/kaggle/input/google-research-identify-contrails-reduce-global-warming/test/'
submission = pd.read_csv(os.path.join(data, 'sample_submission.csv'), index_col='record_id')

In [6]:
filenames = os.listdir(data_root)
test_df = pd.DataFrame(filenames, columns=['record_id'])
test_df['path'] = data_root + test_df['record_id'].astype(str)

In [7]:
class ContrailsDataset(torch.utils.data.Dataset):
    def __init__(self, df, image_size=256, train=False):
        
        self.df = df
        self.trn = train
        self.df_idx: pd.DataFrame = pd.DataFrame({'idx': os.listdir(f'/kaggle/input/google-research-identify-contrails-reduce-global-warming/test')})
        self.normalize_image = T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        self.image_size = image_size
        if image_size != 256:
            self.resize_image = T.transforms.Resize(image_size)
        if train:
            self.hfilp = T.RandomHorizontalFlip(p=1.0)
    
    def read_record(self, directory):
        record_data = {}
        for x in [
            "band_11", 
            "band_14", 
            "band_15"
        ]:

            record_data[x] = np.load(os.path.join(directory, x + ".npy"))

        return record_data

    def normalize_range(self, data, bounds):
        """Maps data to the range [0, 1]."""
        return (data - bounds[0]) / (bounds[1] - bounds[0])
    
    def get_false_color(self, record_data):
        _T11_BOUNDS = (243, 303)
        _CLOUD_TOP_TDIFF_BOUNDS = (-4, 5)
        _TDIFF_BOUNDS = (-4, 2)
        
        N_TIMES_BEFORE = 4

        r = self.normalize_range(record_data["band_15"] - record_data["band_14"], _TDIFF_BOUNDS)
        g = self.normalize_range(record_data["band_14"] - record_data["band_11"], _CLOUD_TOP_TDIFF_BOUNDS)
        b = self.normalize_range(record_data["band_14"], _T11_BOUNDS)
        false_color = np.clip(np.stack([r, g, b], axis=2), 0, 1)
        img = false_color[..., N_TIMES_BEFORE]

        return img
    
    def __getitem__(self, index):
        row = self.df.iloc[index]
        con_path = row.path
        data = self.read_record(con_path)    
        
        img = self.get_false_color(data)
        
        img = torch.tensor(np.reshape(img, (256, 256, 3))).to(torch.float32).permute(2, 0, 1)
        
        if self.image_size != 256:
            img = self.resize_image(img)
            
        if self.trn:
            img = self.hflip(img)
        
        img = self.normalize_image(img)
        
        image_id = int(self.df_idx.iloc[index]['idx'])
            
        return img.float(), torch.tensor(image_id)
    
    def __len__(self):
        return len(self.df)

In [8]:
def rle_encode(x, fg_val=1):
    """
    Args:
        x:  numpy array of shape (height, width), 1 - mask, 0 - background
    Returns: run length encoding as list
    """

    dots = np.where(
        x.T.flatten() == fg_val)[0]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def list_to_string(x):
    """
    Converts list to a string representation
    Empty list returns '-'
    """
    if x: # non-empty list
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        s = '-'
    return s

In [9]:
class LightningModule(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = smp.Unet(encoder_name="tu-maxvit_rmlp_base_rw_384",
                              encoder_weights=None,
                              in_channels=3,
                              classes=1,
                              activation=None,
                              )

    def forward(self, batch):
        return self.model(batch)
    
class LightningModule2(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = smp.Unet(encoder_name="tu-maxxvitv2_rmlp_base_rw_384",
                              encoder_weights=None,
                              in_channels=3,
                              classes=1,
                              activation=None,
                              )

    def forward(self, batch):
        return self.model(batch)

In [10]:
MODEL_PATH = "/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-HF-384-Epoch30-0/"
# MODEL_PATH2 = "/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20/"
# MODEL_PATH3 = "/kaggle/input/contrail-pl-model-save2/timm-resnest200e-resnest50d-Ensemble-Fold10"

In [11]:
test_ds = ContrailsDataset(
        test_df,
        config["model"]["image_size"],
        train = False
    )
 
test_dl = DataLoader(test_ds, batch_size=batch_size, num_workers = num_workers)

In [12]:
from tqdm.notebook import tqdm

def inference_sub(models_paths, device, exp_id, image_size=384, model_number=1):
    all_preds = {}
    test_ds = ContrailsDataset(test_df, image_size=384, train=False)
    test_dl = DataLoader(test_ds, batch_size=batch_size, num_workers=num_workers)
    
    predicted_mask = []
    image_ids = []
        
    for fold_no, model_path in enumerate(model_paths):
        if model_number == 1:
            model = LightningModule().load_from_checkpoint(model_path, config="tu-maxxvit_rmlp_base_rw_384")
        elif model_number == 2:
            model = LightningModule2().load_from_checkpoint(model_path, config="tu-maxvitv2_rmlp_base_rw_384")
        model.to(device)
        model.eval()
        
        model_preds = {}
        
        predicted_mask_all = []
        
        for _, data in tqdm(enumerate(test_dl), total=len(test_dl)):
            images, image_id = data
            images = images.to(device)
            
            with torch.no_grad():
                predicted_mask = model(images[:, :, :, :])
                if image_size != 256:
                    predicted_mask = torch.nn.functional.interpolate(predicted_mask, size=256, mode='bilinear')
            
            predicted_mask = torch.sigmoid(predicted_mask).cpu()
                
            for img_num in range(0, images.shape[0]):
                current_mask = predicted_mask[img_num, :, :, :]
                current_image_id = image_id[img_num].item()
                model_preds[current_image_id] = current_mask
                
        all_preds[f"{fold_no}"] = model_preds
        del model
        torch.cuda.empty_cache()
        gc.collect()
        
    predictions_list = []
    
    for fold_no, model_preds in all_preds.items():
        predictions = []
        image_id_list = []
        for image_id, mask in model_preds.items():
            predictions.append(mask)
            image_id_list.append(image_id)
        predictions_list.append(torch.cat(predictions).reshape(-1, 1, 256, 256))
    predictions = torch.mean(torch.stack(predictions_list, dim=0), dim=0)
    np.save(f"{exp_id}", predictions.numpy())
    del all_preds
    return [], image_id_list

In [13]:
# model_paths = [
#     '/kaggle/input/contrail-pl-model-save2/tu-maxxvitv2_rmlp_base_rw_384-fold5-Epoch20/v2-model-f0-val_dice=0.6798.ckpt',
#     '/kaggle/input/contrail-pl-model-save2/tu-maxxvitv2_rmlp_base_rw_384-fold5-Epoch20/v2-model-f1-val_dice=0.6776.ckpt',
#     '/kaggle/input/contrail-pl-model-save2/tu-maxxvitv2_rmlp_base_rw_384-fold5-Epoch20/v2-model-f2-val_dice=0.6754.ckpt',
#     '/kaggle/input/contrail-pl-model-save2/tu-maxxvitv2_rmlp_base_rw_384-fold5-Epoch20/v2-model-f3-val_dice=0.6813.ckpt',
#     '/kaggle/input/contrail-pl-model-save2/tu-maxxvitv2_rmlp_base_rw_384-fold5-Epoch20/v2-model-f4-val_dice=0.6793.ckpt'
# ]

model_paths = [
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20/model-f0-val_dice=0.6800.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20/model-f1-val_dice=0.6777.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20/model-f2-val_dice=0.6741.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20/model-f3-val_dice=0.6810.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20/model-f4-val_dice=0.6786.ckpt'
]

exp1_384_preds, image_ids = inference_sub(model_paths, device, '1', image_size=384, model_number=1)

model_paths = [
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20-Frame6/model-f0-val_dice=0.6876.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20-Frame6/model-f1-val_dice=0.6863.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20-Frame6/model-f2-val_dice=0.6846.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20-Frame6/model-f3-val_dice=0.6839.ckpt',
    '/kaggle/input/contrail-pl-model-save2/tu-maxvit-rmlp-base-rw-384-fold5-Epoch20-Frame6/model-f4-val_dice=0.6887.ckpt'
]

exp2_384_preds, image_ids = inference_sub(model_paths, device, '2', image_size=384, model_number=1)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [14]:
submission = pd.read_csv(os.path.join(data, 'sample_submission.csv'))

In [15]:
import math

exp1_preds = np.load('1.npy', mmap_mode='r')
exp2_preds = np.load('2.npy', mmap_mode='r')

preds_final = []

CHUNK_SIZE = 150
CHUNKS = math.ceil(len(test_df) / CHUNK_SIZE)
start_idx = 0

for i in range(CHUNKS):
    preds_final.append(
        (exp1_preds[start_idx:start_idx + CHUNK_SIZE])*.7  + (exp2_preds[start_idx:start_idx + CHUNK_SIZE])*.3
    )
    start_idx += CHUNK_SIZE
    
preds_final = np.concatenate(preds_final)
predicted_mask = preds_final

for idx, (image_id, mask) in enumerate(zip(image_ids, predicted_mask)):
    predicted_mask_with_threshold = np.zeros((256, 256))
    predicted_mask_with_threshold[mask[0, :, :] < .4] = 0
    predicted_mask_with_threshold[mask[0, :, :] > .4] = 1
    
    submission.loc[submission['record_id'] == image_id, 'encoded_pixels'] = list_to_string(rle_encode(predicted_mask_with_threshold))

In [16]:
submission = submission.set_index('record_id')

In [17]:
import os, glob
for filename in glob.glob("/kaggle/working/*.npy"):
    os.remove(filename) 

In [18]:
submission.to_csv('submission.csv')

In [19]:
submission.head()

Unnamed: 0_level_0,encoded_pixels
record_id,Unnamed: 1_level_1
1000834164244036115,-
1002653297254493116,-


In [20]:
# gc.enable()

# all_preds = {}

# model_paths_combined = glob.glob(MODEL_PATH + '*.ckpt') # + glob.glob(MODEL_PATH2 + '*.ckpt') # + glob.glob(MODEL_PATH3 + '*.ckpt')

# for i, model_path in enumerate(model_paths_combined):
#     print(model_path)
    
#     if model_path.find("v2-model") != -1:
#         model = LightningModule2().load_from_checkpoint(model_path, config="tu-maxxvitv2_rmlp_base_rw_384")
#     else:
#         model = LightningModule().load_from_checkpoint(model_path, config="tu-maxxvit_rmlp_base_rw_384")
        
#     model.to(device)
#     model.eval()

#     model_preds = {}
    
#     for _, data in enumerate(test_dl):
#         images, image_id = data
    
#         images = images.to(device)
        
#         with torch.no_grad():
#             predicted_mask = model(images[:, :, :, :])
#         if config["model"]["image_size"] != 256:
#             predicted_mask = torch.nn.functional.interpolate(predicted_mask, size=256, mode='bilinear')
#         predicted_mask = torch.sigmoid(predicted_mask).cpu().detach().numpy()
                
#         for img_num in range(0, images.shape[0]):
#             current_mask = predicted_mask[img_num, :, :, :]
#             current_image_id = image_id[img_num].item()
#             model_preds[current_image_id] = current_mask
#     all_preds[f"f{i}"] = model_preds
    
#     del model    
#     torch.cuda.empty_cache()
#     gc.collect() 

In [21]:
# for index in submission.index.tolist():
#     for i in range(len(glob.glob(MODEL_PATH + '*.ckpt'))):
#         if i == 0:
#             predicted_mask = all_preds[f"f{i}"][index]
#         else:
#             predicted_mask += all_preds[f"f{i}"][index]
#     predicted_mask = predicted_mask / len(glob.glob(MODEL_PATH + '*.ckpt'))
#     predicted_mask_with_threshold = np.zeros((256, 256))
#     predicted_mask_with_threshold[predicted_mask[0, :, :] < THR] = 0
#     predicted_mask_with_threshold[predicted_mask[0, :, :] > THR] = 1
#     submission.loc[int(index), 'encoded_pixels'] = list_to_string(rle_encode(predicted_mask_with_threshold))

In [22]:
# submission

In [23]:
# submission.to_csv('submission.csv')