In [None]:
from timm.utils import AverageMeter

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo

from torch_optimizer import *
import ttach as tta

import cv2
import numpy as np
import matplotlib.pyplot as plt

import glob
import os
import copy
import random
import math
import pandas as pd
# import pandas_path
import ast

from tqdm import tqdm_notebook
from sklearn.metrics import mean_squared_error
from collections import OrderedDict

import warnings
warnings.filterwarnings("ignore")

In [None]:
class config:
    BASE_LR = 1e-3
    INPUT_FEATURES = 23
    NUM_EPOCHS = 10
    OPTIMIZER_NAME = "Ranger"
    FILE_PREFIX = "lstm_transformer_23_models"
    PREDS_FILE_PREFIX = 'transformer_class_regr_23models'
    SEED = 43
    BATCH_SIZE = 32
    N = 64
    WORKERS = 16
    FOLD = [0,1,2,3,4]
    DEBUG = False
    MODE = 2 #{0: Train; 1: Val Logits; 2: Test Logits; 3: ALL}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(config.SEED)

In [None]:
class TimeDatasetWIND(Dataset):

    def __init__(self, df, df_train, N = config.N, D = config.INPUT_FEATURES, phase='train'):
        self.data = df.reset_index(drop=True)
        self.data_train = df_train.reset_index(drop=True)
        self.label = df.wind_speed.values
        self.N = N
        self.D = D
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index): # N, D
        
        images = []
        labels = []
        mask = []
        final_images = torch.zeros(self.N, self.D)
        final_labels = np.zeros((self.N, 1))
        final_mask = torch.zeros((self.N))
        images.append(torch.Tensor(ast.literal_eval(self.data.pred_list.iloc[index])).unsqueeze(0))
        labels.append(torch.Tensor([self.label[index]]).unsqueeze(0))
        mask.append(1)
        embedding_num = int(self.data.image_id.iloc[index].split('_')[-1])
        embedding_ocean_id = self.data.storm_id.iloc[index]
        index_train = self.data_train[self.data_train.image_id==self.data.image_id.iloc[index]].index.values[0]
        for i in range(1, self.N):
            embedding_nump = int(self.data_train.image_id.iloc[index_train-i].split('_')[-1])
            embedding_ocean_idp = self.data_train.storm_id.iloc[index_train-i]
            if embedding_num-embedding_nump==i and embedding_ocean_id==embedding_ocean_idp:
                images.append(torch.Tensor(ast.literal_eval(self.data_train.pred_list.iloc[index_train-i])).unsqueeze(0))
                mask.append(1)
                if self.label is not None:
                    labels.append(torch.Tensor([self.data_train.wind_speed.iloc[index_train-i]]).unsqueeze(0))
            else:
                break

        images = torch.cat(images, dim=0)
        labels = torch.cat(labels, axis=0)
        mask = torch.tensor(mask)
        images = torch.flip(images, [0])
        labels = torch.flip(labels, [0])
        mask = torch.flip(mask, [0])
        l = len(images)
        final_images[:l] = images
        final_labels[:l] = labels
        final_mask[:l] = mask
        if self.label is not None:
            return final_images/10, final_mask, final_labels/10
        else:
            return final_images/10, final_mask

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, embed_size=config.INPUT_FEATURES, LSTM_UNITS=128, DO=0.2):
        super(NeuralNet, self).__init__()
        self.lstm1 = nn.LSTM(embed_size, LSTM_UNITS, bidirectional=False, batch_first=True, dropout=DO)
        self.lstm2 = nn.LSTM(LSTM_UNITS, LSTM_UNITS, bidirectional=False, batch_first=True, dropout=DO)

        self.dropout = nn.Dropout(DO)
        self.linear1 = nn.Linear(LSTM_UNITS, LSTM_UNITS)
        self.linear2 = nn.Linear(LSTM_UNITS, LSTM_UNITS)
        
        # self.embedding = nn.Linear(embed_size, LSTM_UNITS)
        self.linear_pe = nn.Linear(LSTM_UNITS, 1)

    def forward(self, x, mask=None):
        # x = self.embedding(x)
        if mask is not None:
            x = x*mask.unsqueeze(-1)
        self.lstm1.flatten_parameters()
        h_lstm1, _ = self.lstm1(x)
        self.lstm2.flatten_parameters()
        h_lstm2, _ = self.lstm2(h_lstm1)
        
        h_conc_linear1  = F.relu(self.linear1(h_lstm1))
        h_conc_linear2  = F.relu(self.linear2(h_lstm2))
        
        hidden = h_lstm1 + h_lstm2 + h_conc_linear1 + h_conc_linear2

        output = self.linear_pe(self.dropout(hidden))*mask.unsqueeze(-1)
        return output

In [None]:
def run_epoch(model, loss_fn, optimizer, phase, scheduler=None):
    running_loss = AverageMeter()
    tk1 = tqdm_notebook(dataloaders[phase])
    if phase == "train":
        model.train()
        for x_var, x_mask, y_var in tk1:
            
            x_var = x_var.to(device=device).float()
            x_mask = x_mask.to(device=device).float()
            y_var = y_var.to(device=device).float()
            
            optimizer.zero_grad()
            scores = model(x_var, x_mask)
            loss = loss_fn(scores, y_var)
                
            running_loss.update(loss.item(), n=config.BATCH_SIZE*config.N)
            tk1.set_postfix(loss=running_loss.avg)
            loss.backward()
            optimizer.step()
            if scheduler is not None:
                scheduler.step()

            if config.DEBUG: break
        return running_loss.avg
    else:
        model.eval()
        y_true = np.array([])
        y_pred = np.array([])
        running_mse = AverageMeter()
        with torch.no_grad():
            for x_var, x_mask, y_var in tk1:
                
                bs = x_var.shape[0]
                x_var = x_var.to(device=device).float()
                x_mask = x_mask.to(device=device).float()
                idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
                y_var = y_var.to(device=device).float().squeeze()
                
                scores = model(x_var, x_mask).squeeze()
                loss = loss_fn(scores, y_var)
                
                running_loss.update(loss.item(), n=config.BATCH_SIZE)
                
                y_var = y_var.squeeze().cpu().detach().numpy()
                scores = scores.squeeze().cpu().detach().numpy()
                y_var = y_var.T[idx].diagonal()
                scores = scores.T[idx].diagonal()
                
                mse = np.sum((scores*10-y_var*10)**2)/len(scores)
                running_mse.update(mse, n=len(scores))
    
                tk1.set_postfix(loss=running_loss.avg, rmse=math.sqrt(running_mse.avg))
                if config.DEBUG: break
        rmse = math.sqrt(running_mse.avg)
        return running_loss.avg, rmse

In [None]:
mkdir lstm_models lstm_logs

In [None]:
if config.MODE in [0,3]:
    for fold in config.FOLD:
        X = pd.read_csv(f'new_split_with_preds_{config.PREDS_FILE_PREFIX}.csv')
        train = X[X['pct_of_storm']<=0.8].copy()
        val = X[X['pct_of_storm']>0.8].copy()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        train_losses = []
        valid_losses = []
        valid_rmse = []

        trainset = TimeDatasetWIND(train, train)
        X = pd.concat([train, val]).sort_values(['image_id']).reset_index(drop=True)
        validset = TimeDatasetWIND(val, X)
        
        train_loader = DataLoader(trainset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=True, pin_memory=True, drop_last=True)
        valid_loader = DataLoader(validset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True, drop_last=True)

        dataloaders = {
            "train" : train_loader,
            "valid" : valid_loader,
        }

        model = NeuralNet()
        model.to(device)
        criterion = nn.MSELoss()
#         optimizer = torch.optim.AdamW(model.parameters(), lr=config.BASE_LR, weight_decay = 0.1)
        optimizer = globals()[config.OPTIMIZER_NAME](model.parameters(), lr=config.BASE_LR, weight_decay = 0.001)
        best_rmse = 10000
        for epoch in range(config.NUM_EPOCHS):
            print('Starting epoch [%d / %d]' % (epoch + 1, config.NUM_EPOCHS))
            train_loss = run_epoch(model, criterion, optimizer, "train")
            valid_loss, rmse = run_epoch(model, criterion, optimizer, "valid")
    #         scheduler.step()

            if rmse<best_rmse:
                print("**Saving model**")
                best_rmse=rmse
                torch.save({
                    "epoch": epoch + 1,
                    "state_dict" : model.state_dict(),
                    "rmse" : best_rmse,
                    "optim_dict" : optimizer.state_dict(),
                    "config_class" : config
                }, f"lstm_models/{config.FILE_PREFIX}_FOLD{fold}.pth")

            train_losses.append(train_loss)
            valid_losses.append(valid_loss)
            valid_rmse.append(rmse)
            df_data=np.array([train_losses, valid_losses, valid_rmse]).T
            df = pd.DataFrame(df_data, columns = ['train_losses','valid_losses','valid_rmse'])
            df.to_csv(f'lstm_logs/{config.FILE_PREFIX}_FOLD{fold}.csv')
            if config.DEBUG: break

## Validation Logits

In [None]:
mkdir val_lstm_npys

In [None]:
if config.MODE in [1,3]:
    for fold in config.FOLD:
        X = pd.read_csv(f'new_split_with_preds_{config.PREDS_FILE_PREFIX}.csv')
        train = X[X.fold!=fold]
        for i in range(len(train)):
            train.pred_list.iloc[i] = [train.wind_speed.iloc[i]]*config.INPUT_FEATURES
        train.pred_list = train.pred_list.astype(str)
        val = X[X.fold==fold]
        X = pd.concat([train, val]).sort_values(['image_id']).reset_index(drop=True)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        validset = TimeDatasetWIND(val, X,phase='test')
        valid_loader = DataLoader(validset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True, drop_last=False)
        model = NeuralNet()
        model.to(device)
        model.load_state_dict(torch.load(f'lstm_models/{config.FILE_PREFIX}_FOLD{fold}.pth','cpu')['state_dict'])
        model.eval()
        y_pred = []
        tk1 = tqdm_notebook(valid_loader)
        with torch.no_grad():
            for x_var, x_mask, y_var in tk1:
                x_var = x_var.to(device=device).float()
                x_mask = x_mask.to(device=device).float()
                idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
                y_var = y_var.to(device=device).float().squeeze()
                scores = model(x_var, x_mask).squeeze()

                scores = scores.squeeze().cpu().detach().numpy()
                scores = scores.T[idx].diagonal()
                y_pred.extend(scores)
        np.save(f'val_lstm_npys/lstm_val_preds_{config.FILE_PREFIX}_N={config.N}_FOLD{fold}.npy', y_pred)

## Test Logits

In [None]:
mkdir test_lstm_npys

In [None]:
def get_test_predictions(fold):
    models_list = sorted(glob.glob(f'*transformer_class_test_npys/*{fold}.npy')) + sorted(glob.glob(f'*transformer_regr_test_npys/*transformer*{fold}.npy'))
    pred_list = []
    for i in models_list:
        i = i.replace('val', 'test').replace('_test_preds_fold_', '_preds_fold_test_')
        a = np.load(f'{i[:-5]}{fold}.npy')
        if a.max()<20:
            pred_list.append(a.reshape(-1)*10)
        else:
            pred_list.append(a.reshape(-1))
    return np.array(pred_list)

In [None]:
if config.MODE in [2,3]:
    for fold in config.FOLD:
        X = pd.read_csv('sub.csv')
        pred_list = get_test_predictions(fold)
        X['storm_id'] = X.image_id.str[:3]
        X['pred_list'] = pd.Series((pred_list.T).tolist()).values
        X.to_csv('temp.csv',index=False)
        X = pd.read_csv('temp.csv')
        os.system('rm -rf temp.csv')
        X_train = pd.read_csv(f'new_split_with_preds_{config.PREDS_FILE_PREFIX}.csv')
        
        for i in range(len(X_train)):
            X_train.pred_list.iloc[i] = [X_train.wind_speed.iloc[i]]*config.INPUT_FEATURES
        X_train.pred_list = X_train.pred_list.astype(str)
        
        X_full = pd.concat([X_train, X]).sort_values(['image_id']).reset_index(drop=True)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        validset = TimeDatasetWIND(X, X_full,phase='test')
        valid_loader = DataLoader(validset, config.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True, drop_last=False)
        model = NeuralNet()
        model.to(device)
        model.load_state_dict(torch.load(f'lstm_models/{config.FILE_PREFIX}_FOLD{fold}.pth','cpu')['state_dict'])
        model.eval()
        y_pred = []
        tk1 = tqdm_notebook(valid_loader)
        with torch.no_grad():
            for x_var, x_mask, y_var in tk1:
                x_var = x_var.to(device=device).float()
                x_mask = x_mask.to(device=device).float()
                idx = len(x_mask[0])-np.argmax(x_mask.cpu().numpy()[:,::-1], axis=1)-1
                y_var = y_var.to(device=device).float().squeeze()
                scores = model(x_var, x_mask).squeeze()

                scores = scores.squeeze().cpu().detach().numpy()
                scores = scores.T[idx].diagonal()
                y_pred.extend(scores)
        np.save(f'test_lstm_npys/lstm_test_preds_{config.FILE_PREFIX}_N={config.N}_FOLD{fold}.npy', y_pred)

## Preparing LSTM training Features

In [None]:
def get_val_predictions(fold):
    models_list = sorted(glob.glob(f'*transformer_class_val_npys/*{fold}.npy')) + sorted(glob.glob(f'*transformer_regr_val_npys/*transformer*{fold}.npy'))
    print(len(models_list))
    pred = np.zeros(len(np.load(models_list[0])))
    pred_list = []
    for i in models_list:
        a = np.load(f'{i[:-5]}{fold}.npy')
        if a.max()<20:
            pred_list.append(a.reshape(-1)*10)
        else:
            pred_list.append(a.reshape(-1))
    pred = (pred)/len(models_list)
    return pred, np.array(pred_list)

In [None]:
if config.MODE in [4]:
    Y = pd.read_csv('new_split.csv')
    Y['pred_list'] = None
    for fold in [0,1,2,3,4]:
        X = pd.read_csv('new_split.csv')
        _, pred_list = get_val_predictions(fold)
        Y['pred_list'][X.fold==fold] = pd.Series((pred_list.T).tolist()).values
    Y.to_csv(f'new_split_with_preds_{config.PREDS_FILE_PREFIX}.csv',index = False)