In [None]:
from timm.utils import AverageMeter
from timm.models import *
from timm.loss import SoftTargetCrossEntropy
import timm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo

from torch_optimizer import Ranger
import ttach as tta

import cv2
import numpy as np
import matplotlib.pyplot as plt

import glob
import os
import copy
import random
import math
import pandas as pd

from tqdm import tqdm_notebook

from albumentations import (
    HorizontalFlip, VerticalFlip, ShiftScaleRotate, Transpose, HueSaturationValue, MotionBlur, 
    RandomResizedCrop, RandomBrightnessContrast, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    CenterCrop, Resize, RandomCrop, CenterCrop
)

from albumentations.pytorch import ToTensorV2
from torch.nn import TransformerEncoder, TransformerEncoderLayer

import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('new_split.csv')
ratio = 768/1280

In [None]:
class config:
    BASE_LR = 5e-6
    NUM_CLASSES = 1
    BIN_SIZE = 10
    NUM_EPOCHS = 10
    MODEL_NAMES = ["mixnet_xl", "dpn68b", "mixnet_l", "efficientnet_es", "mobilenetv3_large_100", "seresnext26t_32x4d"]
    INPUT_FEATURES = [np.load(f'class_embeddings/{i}_FOLD{0}_{df.image_id.iloc[0]}.npy').shape[0] for i in MODEL_NAMES]
    D_MODELS = [int(((i*ratio)//2)*2) for i in INPUT_FEATURES]
    MODEL_NAME = "test"
    OPTIMIZER_NAME = "Ranger"
    FILE_PREFIX = "transformer-N=64"
    INPUT_FEATURE = 1280
    SEED = 43
    IMG_SIZE = 320
    MEAN = [0.485, 0.456, 0.406]
    STD = [0.229, 0.224, 0.225]
    BATCH_SIZES = [32]*len(MODEL_NAMES)
    BATCH_SIZE = 32
    N = 64
    WORKERS = 16
    FOLD = [0,1,2,3,4]
    DEBUG = False
    MODE = 2 #{0: Train; 1: Val Logits; 2: Test Logits; 3: ALL}

In [None]:
config.D_MODELS

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(config.SEED)

In [None]:
def get_train_transforms():
    return Compose([
            RandomResizedCrop(config.IMG_SIZE, config.IMG_SIZE),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5, rotate_limit=180),
            Normalize(mean=config.MEAN, std=config.STD, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)
  
        
def get_valid_transforms(tta=False):
    aug = []
    if not tta: aug.extend([CenterCrop(config.IMG_SIZE, config.IMG_SIZE)])
    aug.extend([
            Normalize(mean=config.MEAN, std=config.STD, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ])
    return Compose(aug)

train_transform = get_train_transforms()
valid_transform = get_valid_transforms()
tta_test_transform = get_valid_transforms(tta=True)

tta_transforms = tta.Compose(
    [
        tta.FiveCrops(config.IMG_SIZE, config.IMG_SIZE)  
    ]
)

In [None]:
class TimeDatasetWIND(Dataset):

    def __init__(self, df, df_train, N = config.N, D = config.INPUT_FEATURE, phase='train'):
        self.data = df.reset_index(drop=True)
        self.data_train = df_train.reset_index(drop=True)
        self.label = df.wind_speed
        self.N = N
        self.D = D
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index): # N, D
            images = []
            mask = []
            final_images = torch.zeros(self.N, self.D)
            final_labels = self.get_labels(index).unsqueeze(0)
            final_mask = torch.zeros((self.N))
            images.append(torch.Tensor(np.load(f'class_embeddings/{config.MODEL_NAME}_FOLD{fold}_{self.data.image_id.iloc[index]}.npy')).unsqueeze(0))
            mask.append(1)
            embedding_num = int(self.data.image_id.iloc[index].split('_')[-1])
            embedding_ocean_id = self.data.storm_id.iloc[index]
            index_train = self.data_train[self.data_train.image_id==self.data.image_id.iloc[index]].index.values[0]
            for i in range(1, self.N):
                embedding_nump = int(self.data_train.image_id.iloc[index_train-i].split('_')[-1])
                embedding_ocean_idp = self.data_train.storm_id.iloc[index_train-i]
                if embedding_num-embedding_nump==i and embedding_ocean_id==embedding_ocean_idp:
                    images.append(torch.Tensor(np.load(f'class_embeddings/{config.MODEL_NAME}_FOLD{fold}_{self.data_train.image_id.iloc[index_train-i]}.npy')).unsqueeze(0))
                    mask.append(1)
                else:
                    break
            images = torch.cat(images, dim=0)
            mask = torch.tensor(mask)
            images = torch.flip(images, [0])
            mask = torch.flip(mask, [0])
            l = len(images)
            final_images[:l] = images
            final_mask[:l] = mask
            if self.label is not None:
                return final_images, final_mask, final_labels
            else:
                return final_images, final_mask       
    def get_labels(self, index):
        wind_speed = self.label.iloc[index].astype(float)/config.BIN_SIZE
        return torch.tensor(wind_speed)

In [None]:
class TimeDatasetWIND_FAST(Dataset):

    def __init__(self, final_images, final_mask, final_labels):
        self.final_images = final_images
        self.final_mask = final_mask
        self.final_labels = final_labels  
    def __len__(self):
        return len(self.final_images)

    def __getitem__(self, index): # N, D
        return self.final_images[index], self.final_mask[index], self.final_labels[index]

In [None]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [None]:
# BEST input_dim=1280, d_model=1024, nhead=2, dim_feedforward=2048, nlayers=3, dropout=0.1

In [None]:
class TransformerModel(nn.Module):
    def __init__(self, num_classes=1, input_dim=1280, d_model=768, nhead=2, dim_feedforward=2048, nlayers=3, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayers, norm=nn.ReLU())
        self.d_model = d_model
        self.input_dim = input_dim
        self.decoder = nn.Linear(d_model, num_classes)
        self.fc = nn.Linear(self.input_dim, self.d_model)
        self.do = nn.Dropout(0.2)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, embedding, mask=None):
        embedding = self.fc(embedding)
        
        if mask is not None:
            embedding = embedding*mask.unsqueeze(-1)
        embedding = self.pos_encoder(embedding.permute(1,0,2))
        output = self.transformer_encoder(embedding)
        output = self.decoder(self.do(output))
        return output.permute(1,0,2)

In [None]:
def run_epoch(model, loss_fn, optimizer, phase, scheduler=None, num_steps=20000):
    running_loss = AverageMeter()
    if phase == "train":
        tk1 = tqdm_notebook(dataloaders[phase], total=min(len(dataloaders[phase]), num_steps))
        model.train()
        for x_var, x_mask, y_var in tk1:
            
            x_var = x_var.to(device=device).float()
            x_mask = x_mask.to(device=device).float()
            y_var = y_var.to(device=device).float()
            idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
            optimizer.zero_grad()
            score = model(x_var, x_mask)[:,idx,:].diagonal().t()
            y_var = y_var
            loss = loss_fn(score.reshape(-1,config.NUM_CLASSES), y_var.reshape(-1,config.NUM_CLASSES))  
            running_loss.update(loss.item(), n=config.BATCH_SIZE*config.N)
            tk1.set_postfix(loss=running_loss.avg)
            loss.backward()
            
            optimizer.step()
            
            num_steps-=1
            if num_steps<0:
                break
            if scheduler is not None:
                scheduler.step()
            if config.DEBUG: break
        return running_loss.avg
    else:
        tk1 = tqdm_notebook(dataloaders[phase], total=len(dataloaders[phase]))
        model.eval()
        y_true = np.array([])
        y_pred = np.array([])
        running_mse = AverageMeter()
        with torch.no_grad():
            for x_var, x_mask, y_var in tk1:
                
                bs = x_var.shape[0]
                x_var = x_var.to(device=device).float()
                x_mask = x_mask.to(device=device).float()
                y_var = y_var.to(device=device).float()
                idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
                score = model(x_var, x_mask)[:,idx,:].diagonal().t()
                y_var = y_var
                loss = loss_fn(score.reshape(-1,config.NUM_CLASSES), y_var.reshape(-1,config.NUM_CLASSES)) 
            
                running_loss.update(loss.item(), n=config.BATCH_SIZE)
                
                y_var = y_var.cpu().detach().numpy()*config.BIN_SIZE
                score = score.cpu().detach().numpy()*config.BIN_SIZE
                
                mse = np.sum((score-y_var)**2)/(len(score))
                running_mse.update(mse, n=len(score))
    
                tk1.set_postfix(loss=running_loss.avg, rmse=math.sqrt(running_mse.avg))
                if config.DEBUG: break
        rmse = math.sqrt(running_mse.avg)
        return running_loss.avg, rmse

In [None]:
import copy
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')
import gc

In [None]:
if config.MODE in [0,3]:
    for config.MODEL_NAME, config.BATCH_SIZE, config.INPUT_FEATURE, config.D_MODEL in zip(config.MODEL_NAMES, config.BATCH_SIZES, config.INPUT_FEATURES, config.D_MODELS):
        for fold in config.FOLD:

            X = pd.read_csv('new_split.csv')

            train = X[X.fold!=fold]
            val = X[X.fold==fold]

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            train_losses = []
            valid_losses = []
            valid_rmse = []

            print('saving objects ...')
            final_images_ = []
            final_mask_ =[]
            final_labels_ =[]
            val_final_images_ = []
            val_final_mask_ =[]
            val_final_labels_ =[]
            trainset = TimeDatasetWIND(train, train, D = config.INPUT_FEATURE)
            validset = TimeDatasetWIND(val, X, D = config.INPUT_FEATURE)
            train_loader = DataLoader(trainset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=False, drop_last=False)
            valid_loader = DataLoader(validset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=False, drop_last=False)
            tk1 = tqdm_notebook(train_loader, total=len(train_loader))
            for final_images, final_mask, final_labels in tk1:
                final_images_.extend(copy.deepcopy(final_images))
                final_mask_.extend(copy.deepcopy(final_mask))
                final_labels_.extend(copy.deepcopy(final_labels))
                del final_images, final_mask, final_labels

            tk2 = tqdm_notebook(valid_loader, total=len(valid_loader))
            for final_images, final_mask, final_labels in tk2:
                val_final_images_.extend(copy.deepcopy(final_images))
                val_final_mask_.extend(copy.deepcopy(final_mask))
                val_final_labels_.extend(copy.deepcopy(final_labels))
                del final_images, final_mask, final_labels

            print('saved ...')

            trainset = TimeDatasetWIND_FAST(final_images_, final_mask_, final_labels_)
            validset = TimeDatasetWIND_FAST(val_final_images_, val_final_mask_, val_final_labels_)
            train_loader = DataLoader(trainset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=True, pin_memory=True, drop_last=True)
            valid_loader = DataLoader(validset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True, drop_last=True)
            dataloaders = {
                "train" : train_loader,
                "valid" : valid_loader,
            }

            model = TransformerModel(input_dim=config.INPUT_FEATURE, d_model=config.D_MODEL)
            model.to(device)
            criterion = nn.MSELoss()
            optimizer = globals()[config.OPTIMIZER_NAME](model.parameters(), lr=config.BASE_LR)
#             scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, max_momentum=0.95, div_factor=25.0, final_div_factor=10000.0, last_epoch=-1, verbose=True)
            best_rmse = 10000
            for epoch in range(config.NUM_EPOCHS):
                print('Starting epoch [%d / %d]' % (epoch + 1, config.NUM_EPOCHS))
                train_loss = run_epoch(model, criterion, optimizer, "train")
                valid_loss, rmse = run_epoch(model, criterion, optimizer, "valid")

                if rmse<best_rmse:
                    print("**Saving model**")
                    best_rmse=rmse
                    torch.save({
                        "epoch": epoch + 1,
                        "state_dict" : model.state_dict(),
                        "rmse" : best_rmse,
                        "optim_dict" : optimizer.state_dict(),
                        "config_class" : config
                    }, f"models/{config.MODEL_NAME}_{config.FILE_PREFIX}_FOLD{fold}.pth")

                train_losses.append(train_loss)
                valid_losses.append(valid_loss)
                valid_rmse.append(rmse)
                df_data=np.array([train_losses, valid_losses, valid_rmse]).T
                df = pd.DataFrame(df_data, columns = ['train_losses','valid_losses','valid_rmse1'])
                df.to_csv(f'logs/{config.MODEL_NAME}_{config.FILE_PREFIX}_FOLD{fold}.csv')
                if config.DEBUG: break
            del dataloaders, valid_loader, train_loader, validset, trainset, val_final_images_, val_final_mask_, val_final_labels_, final_images_, final_mask_, final_labels_
            gc.collect()

## Validation Logits

In [None]:
mkdir transformer_regr_val_npys

In [None]:
if config.MODE in [1, 3]:
    for config.MODEL_NAME, config.BATCH_SIZE, config.INPUT_FEATURE, config.D_MODEL in zip(config.MODEL_NAMES, config.BATCH_SIZES, config.INPUT_FEATURES, config.D_MODELS):
        for fold in config.FOLD:
            X = pd.read_csv('new_split.csv')
            train = X[X.fold!=fold]
            val = X[X.fold==fold]
            validset = TimeDatasetWIND(val, X, D = config.INPUT_FEATURE)
            valid_loader = DataLoader(validset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = TransformerModel(input_dim=config.INPUT_FEATURE, d_model=config.D_MODEL)
            model.load_state_dict(torch.load(f"models/{config.MODEL_NAME}_{config.FILE_PREFIX}_FOLD{fold}.pth")["state_dict"])
            model.to(device)
            model.eval()
            y_logits = []
            tk1 = tqdm_notebook(valid_loader, total=len(valid_loader))
            with torch.no_grad():
                for x_var, x_mask, y_var in tk1:
                    bs = x_var.shape[0]
                    x_var = x_var.to(device=device).float()
                    x_mask = x_mask.to(device=device).float()
                    idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
                    score = model(x_var, x_mask)[:,idx,:].diagonal().t()
                    y_logits.extend(score.detach().cpu().numpy())
            np.save(f'transformer_class_val_npys/{config.MODEL_NAME}_{config.FILE_PREFIX}_val_preds_fold_{fold}.npy', y_logits)

## Test Logits

In [None]:
mkdir transformer_regr_test_npys

In [None]:
x_test = pd.read_csv('data/test_set_features.csv')
x_test["file_name"] = "data/test/test/"+x_test.image_id+".jpg"
x_test['wind_speed']=0
X = pd.read_csv('new_split.csv')
X = pd.concat([x_test,X]).sort_values(['image_id']).reset_index(drop=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if config.MODE in [2, 3]:
    for config.MODEL_NAME, config.BATCH_SIZE, config.INPUT_FEATURE, config.D_MODEL in zip(config.MODEL_NAMES, config.BATCH_SIZES, config.INPUT_FEATURES, config.D_MODELS):
        testset = TimeDatasetWIND(x_test, X, D = config.INPUT_FEATURE)
        test_loader = DataLoader(testset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True)
        for fold in config.FOLD:
            model = TransformerModel(input_dim=config.INPUT_FEATURE, d_model=config.D_MODEL)
            model.load_state_dict(torch.load(f"models/{config.MODEL_NAME}_{config.FILE_PREFIX}_FOLD{fold}.pth")["state_dict"])
            model.to(device)
            model.eval()
            y_logits = []
            tk1 = tqdm_notebook(test_loader, total=len(test_loader))
            with torch.no_grad():
                for x_var, x_mask, y_var in tk1:
                    bs = x_var.shape[0]
                    x_var = x_var.to(device=device).float()
                    x_mask = x_mask.to(device=device).float()
                    idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
                    score = model(x_var, x_mask)[:,idx,:].diagonal().t()
                    y_logits.extend(score.detach().cpu().numpy())
            np.save(f'transformer_class_test_npys/{config.MODEL_NAME}_{config.FILE_PREFIX}_preds_fold_test_{fold}.npy', y_logits)