# Libraries

In [None]:
import torch
folder = '../outputs/exp2/finetune_roberta_lovasz/'
device = torch.device('cuda:2')

In [None]:
import numpy as np
import pandas as pd
import os
import warnings
import random
import torch 
from torch import nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
import tokenizers
from transformers import RobertaModel, RobertaConfig

warnings.filterwarnings('ignore')
from tqdm.notebook import tqdm
import pickle as pkl
from torch.autograd import Variable

# Seed

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed = 42
seed_everything(seed)

# Data Loader

In [None]:
class TweetDataset(torch.utils.data.Dataset):
    def __init__(self, df, max_len=96):
        self.df = df
        self.max_len = max_len
        self.labeled = 'selected_text' in df
        self.tokenizer = tokenizers.ByteLevelBPETokenizer(
            vocab_file='../input/roberta-base/vocab.json', 
            merges_file='../input/roberta-base/merges.txt', 
            lowercase=True,
            add_prefix_space=True)

    def __getitem__(self, index):
        data = {}
        row = self.df.iloc[index]
        
        ids, masks, tweet, offsets, selected_text = self.get_input_data(row)
        data['ids'] = ids
        data['masks'] = masks
        data['tweet'] = tweet
        data['offsets'] = offsets
        data['selected_text'] = selected_text
        
        if self.labeled:
            start_idx, end_idx = self.get_target_idx(row, tweet, offsets)
            data['target_mask'] = self.get_target_mask(row, tweet, offsets)
            data['start_idx'] = start_idx
            data['end_idx'] = end_idx
        
        return data

    def __len__(self):
        return len(self.df)
    
    def get_input_data(self, row):
        tweet = " " + " ".join(row.text.lower().split())
        selected_text = " " +  " ".join(row.selected_text.lower().split())
        encoding = self.tokenizer.encode(tweet)
        sentiment_id = self.tokenizer.encode(row.sentiment).ids
        ids = [0] + sentiment_id + [2, 2] + encoding.ids + [2]
        offsets = [(0, 0)] * 4 + encoding.offsets + [(0, 0)]
                
        pad_len = self.max_len - len(ids)
        if pad_len > 0:
            ids += [1] * pad_len
            offsets += [(0, 0)] * pad_len
        
        ids = torch.tensor(ids)
        masks = torch.where(ids != 1, torch.tensor(1), torch.tensor(0))
        offsets = torch.tensor(offsets)
        
        return ids, masks, tweet, offsets, selected_text
        
    def get_target_idx(self, row, tweet, offsets):
        selected_text = " " +  " ".join(row.selected_text.lower().split())

        len_st = len(selected_text) - 1
        idx0 = None
        idx1 = None

        for ind in (i for i, e in enumerate(tweet) if e == selected_text[1]):
            if " " + tweet[ind: ind+len_st] == selected_text:
                idx0 = ind
                idx1 = ind + len_st - 1
                break

        char_targets = [0] * len(tweet)
        if idx0 != None and idx1 != None:
            for ct in range(idx0, idx1 + 1):
                char_targets[ct] = 1

        target_idx = []
        for j, (offset1, offset2) in enumerate(offsets):
            if sum(char_targets[offset1: offset2]) > 0:
                target_idx.append(j)

        start_idx = target_idx[0]
        end_idx = target_idx[-1]
        
        return start_idx, end_idx
    
    def get_target_mask(self, row, tweet, offsets):
        selected_text = " " +  " ".join(row.selected_text.lower().split())

        len_st = len(selected_text) - 1
        idx0 = None
        idx1 = None

        for ind in (i for i, e in enumerate(tweet) if e == selected_text[1]):
            if " " + tweet[ind: ind+len_st] == selected_text:
                idx0 = ind
                idx1 = ind + len_st - 1
                break

        char_targets = [0] * len(tweet)
        if idx0 != None and idx1 != None:
            for ct in range(idx0, idx1 + 1):
                char_targets[ct] = 1

        target_idx = []
        for j, (offset1, offset2) in enumerate(offsets):
            if sum(char_targets[offset1: offset2]) > 0:
                target_idx.append(j)

        start_idx = target_idx[0]
        end_idx = target_idx[-1]
        
        target_mask = np.array([0.]*self.max_len)
        target_mask[start_idx:end_idx+1] = 1.
        
        return target_mask
        
def get_train_val_loaders(df, train_idx, val_idx, batch_size=8):
    train_df = df.iloc[train_idx]
    val_df = df.iloc[val_idx]

    train_loader = torch.utils.data.DataLoader(
        TweetDataset(train_df), 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=2,
        drop_last=True)

    val_loader = torch.utils.data.DataLoader(
        TweetDataset(val_df), 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=2)

    dataloaders_dict = {"train": train_loader, "val": val_loader}

    return dataloaders_dict

def get_test_loader(df, batch_size=32):
    loader = torch.utils.data.DataLoader(
        TweetDataset(df), 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=2)    
    return loader

# Model

In [None]:
class TweetModel(nn.Module):
    def __init__(self):
        super(TweetModel, self).__init__()

        config = RobertaConfig.from_pretrained(
            '../input/roberta-base/config.json', output_hidden_states=True)    
        self.roberta = RobertaModel.from_pretrained(
            '../input/roberta-base/pytorch_model.bin', config=config)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(config.hidden_size, 2)
        self.fc2 = nn.Linear(config.hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
        
        nn.init.normal_(self.fc.weight, std=0.02)
        nn.init.normal_(self.fc.bias, 0)
        
        nn.init.normal_(self.fc2.weight, std=0.02)
        nn.init.normal_(self.fc2.bias, 0)

    def forward(self, input_ids, attention_mask):
        lhs, _, hs = self.roberta(input_ids, attention_mask)
         
#         x = torch.stack([hs[-1], hs[-2], hs[-3], hs[-4]])
#         x = torch.mean(x, 0)
        x = lhs
        x = self.dropout(x)
        
        x = self.fc2(x)
        pred_masks = self.sigmoid(x)
#         start_logits, end_logits = x.split(1, dim=-1)
#         start_logits = start_logits.squeeze(-1)
#         end_logits = end_logits.squeeze(-1)
                
        return pred_masks

# Loss Function

In [None]:
# def loss_fn(start_logits, end_logits, start_positions, end_positions):
#     ce_loss = nn.CrossEntropyLoss()
#     start_loss = ce_loss(start_logits, start_positions)
#     end_loss = ce_loss(end_logits, end_positions)    
#     total_loss = start_loss + end_loss
#     return total_loss

In [None]:
# def loss_fn2(pred_masks, target_masks):
#     loss = nn.BCELoss()(pred_masks, target_masks)
#     return loss

In [None]:
# def jaccard_loss(pred, target, smooth=1e-10):
#     if len(pred.shape)>=3:
#         pred = pred.view(pred.shape[0], pred.shape[1])
#     I = (pred * target).sum()
#     P = pred.sum()
#     T = target.sum()
#     loss = 1 - ((I + smooth) / (P + T - I + smooth))
#     return loss

# def dice_loss(pred, target, smooth=1e-10):
#     if len(pred.shape)>=3:
#         pred = pred.view(pred.shape[0], pred.shape[1])
#     I = (pred * target).sum()
#     P = pred.sum()
#     T = target.sum()
#     loss = 1 - (2 * (I + smooth) / (P + T + smooth))
#     return loss

In [None]:
def lovasz_grad(gt_sorted):
    """
    Computes gradient of the Lovasz extension w.r.t sorted errors
    See Alg. 1 in paper
    """
    p = len(gt_sorted)
    gts = gt_sorted.sum()
    intersection = gts - gt_sorted.float().cumsum(0)
    union = gts + (1 - gt_sorted).float().cumsum(0)
    jaccard = 1. - intersection / union
    if p > 1: # cover 1-pixel case
        jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
    return jaccard

def lovasz_softmax_flat(probas, labels, classes='present'):
    """
    Multi-class Lovasz-Softmax loss
      probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1)
      labels: [P] Tensor, ground truth labels (between 0 and C - 1)
      classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
    """
    if probas.numel() == 0:
        # only void pixels, the gradients should be 0
        return probas * 0.
    C = probas.size(1)
    losses = []
    class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes
    for c in class_to_sum:
        fg = (labels == c).float() # foreground for class c
        if (classes is 'present' and fg.sum() == 0):
            continue
        if C == 1:
            if len(classes) > 1:
                raise ValueError('Sigmoid output possible only with 1 class')
            class_pred = probas[:, 0]
        else:
            class_pred = probas[:, c]
        errors = (Variable(fg) - class_pred).abs()
        errors_sorted, perm = torch.sort(errors, 0, descending=True)
        perm = perm.data
        fg_sorted = fg[perm]
        losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted))))
    return mean(losses)

def mean(l, ignore_nan=False, empty=0):
    """
    nanmean compatible with generators.
    """
    l = iter(l)
    if ignore_nan:
        l = ifilterfalse(isnan, l)
    try:
        n = 1
        acc = next(l)
    except StopIteration:
        if empty == 'raise':
            raise ValueError('Empty mean')
        return empty
    for n, v in enumerate(l, 2):
        acc += v
    if n == 1:
        return acc
    return acc / n

In [None]:
# p = torch.randn(32,96,1).view(-1, 1)
# t = torch.zeros(32, 96).view(-1)
# lovasz_softmax_flat(p, t, [0])

In [None]:
def loss_fn3(pred_masks, target_masks):
    loss = lovasz_softmax_flat(pred_masks.view(-1,1), target_masks.view(-1), [1])
    return loss

# Evaluation Function

In [None]:
def get_selected_text(text, start_idx, end_idx, offsets):
    selected_text = ""
    for ix in range(start_idx, end_idx + 1):
        selected_text += text[offsets[ix][0]: offsets[ix][1]]
        if (ix + 1) < len(offsets) and offsets[ix][1] < offsets[ix + 1][0]:
            selected_text += " "
    return selected_text

def get_selected_text2(text, idxs, offsets):
    selected_text = ""
    for ix in idxs:
#         print('ix:', ix)
#         print('offsets shape:', offsets.shape)
#         print('offsets[ix]:', offsets[ix])
        selected_text += text[offsets[ix][0]: offsets[ix][1]]
        if (ix + 1) < len(offsets) and offsets[ix][1] < offsets[ix + 1][0]:
            selected_text += " "
    return selected_text

def jaccard(str1, str2): 
    a = set(str1.lower().split()) 
    b = set(str2.lower().split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))

def compute_jaccard_score(text, start_idx, end_idx, start_logits, end_logits, offsets):
    start_pred = np.argmax(start_logits)
    end_pred = np.argmax(end_logits)
    if start_pred > end_pred:
        pred = text
    else:
        pred = get_selected_text(text, start_pred, end_pred, offsets)
        
    true = get_selected_text(text, start_idx, end_idx, offsets)
    return jaccard(true, pred)

def compute_jaccard_score2(text, target_masks, pred_masks, offsets):
    idxs = np.array(np.where(pred_masks>0.5))[0].T
    if len(idxs)==0: 
        js = 0
    else:
        pred = get_selected_text2(text, idxs, offsets)
        idxs_true = np.array(np.where(target_masks==1))[0].T
        true = get_selected_text2(text, idxs_true, offsets)
        js = jaccard(true,pred)
    return js

In [None]:
# p = torch.randn(3,9,1)
# p = nn.Sigmoid()(p)
# i = (p>0.5).nonzero()
# i = torch.where(p>1)
# p[i[0]].shape
# p[i]
# i = torch.stack(i).T
# len(i)
# p[i[0]][i[1]], i, p

In [None]:
# torch.cuda.device_count()

In [None]:
# math.isfinite([1,2,3])

In [None]:
# a = torch.tensor([[1,2,3],[4,5,6]])
# a.type(torch.float32)
# # a

# Training Function

In [None]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, filename, device):
    model.cuda(device)
    metrics = []
    for epoch in range(num_epochs):
        print(f'Epoch: {epoch}')
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            epoch_loss = 0.0
            epoch_jaccard = 0.0
            total = (len(dataloaders_dict[phase]))
            t = tqdm(total=total)
            print_every=10
                        
            for j, data in enumerate(dataloaders_dict[phase]):
                ids = data['ids'].cuda(device)
                masks = data['masks'].cuda(device)
                tweet = data['tweet']
                offsets = data['offsets'].numpy()
                target_masks = (  data['target_mask'].type(torch.float32)  ).cuda(device)
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    pred_masks = model(ids, masks)
#                     pred_masks = pred_masks.reshape(pred_masks.shape[0], pre)
#                     global pred_masks_, target_masks_
#                     pred_masks_ = pred_masks; target_masks_ = target_masks
#                     return 0
                    loss = criterion(pred_masks, target_masks)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()             
            
                    epoch_loss += loss.item() * len(ids)        
                    target_masks = target_masks.cpu().detach().numpy()
                    pred_masks = pred_masks.cpu().detach().numpy()
    
                    for i in range(len(ids)):                        
                        jaccard_score_ = compute_jaccard_score2(
                                tweet[i],
                                target_masks[i],
                                pred_masks[i],
    #                             start_idx[i],
    #                             end_idx[i],
    #                             start_logits[i], 
    #                             end_logits[i], 
                                offsets[i])
                        epoch_jaccard += jaccard_score_
#                 print(i%print_every==0)
                if j%print_every==0:
#                     print('!')
                    t.update(print_every if j+print_every<total else total-j)
            t.close(); 
            epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
            epoch_jaccard = epoch_jaccard / len(dataloaders_dict[phase].dataset)
            
            print('Epoch {}/{} | {:^5} | Loss: {:.4f} | Jaccard: {:.4f}'.format(
                epoch + 1, num_epochs, phase, epoch_loss, epoch_jaccard))
            
            metrics.append([phase, epoch_loss, epoch_jaccard])
            with open(folder+'metrics.pkl', 'wb') as f:
                pkl.dump(metrics, f)
            
            
        torch.save(model.state_dict(), filename)

# Training

In [None]:
num_epochs = 3
batch_size = 48

# %%time
# train_file = '../input/tweet-sentiment-extraction/train.csv'
train_file = '../tweet-sentiment-extraction/train.csv'
train_df = pd.read_csv(train_file)
train_df['text'] = train_df['text'].astype(str)
train_df['selected_text'] = train_df['selected_text'].astype(str)
# train_df = train_df[train_df['sentiment']!='neutral']

model = TweetModel()
model.eval();

In [None]:
# for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): 
#     print(f'Fold: {fold}')

#     model = TweetModel()
#     optimizer = optim.AdamW(model.parameters(), lr=3e-5, betas=(0.9, 0.999))
#     criterion = loss_fn2    
#     dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size)
#     device = torch.device('cuda:2')
# #     train_model(
# #         model, 
# #         dataloaders_dict,
# #         criterion, 
# #         optimizer, 
# #         num_epochs,
# #         folder+f'roberta_fold{fold}.pth',
# #         device)
    
#     model.cuda(device)
    
#     for epoch in range(num_epochs):
#         print(f'Epoch: {epoch}')
#         for phase in ['train', 'val']:
#             if phase == 'train':
#                 model.train()
#             else:
#                 model.eval()

#             epoch_loss = 0.0
#             epoch_jaccard = 0.0
#             total = (len(dataloaders_dict[phase]))
#             t = tqdm(total=total)
#             print_every=10
                        
#             for j, data in enumerate(dataloaders_dict[phase]):
#                 ids = data['ids'].cuda(device)
#                 masks = data['masks'].cuda(device)
#                 tweet = data['tweet']
#                 offsets = data['offsets'].numpy()
# #                 start_idx = torch.tensor( data['start_idx'] ).cuda(device)
# #                 end_idx = data['end_idx'].cuda(device)
# #                 selected_text = data['selected_text']
#                 target_masks = (  data['target_mask'].type(torch.float32)  ).cuda(device)
#     #             start_logits, end_logits = model(ids, masks)

#     #             loss = criterion(start_logits, end_logits, start_idx, end_idx)
#                 optimizer.zero_grad()
                
#                 with torch.set_grad_enabled(phase == 'train'):
#                     pred_masks = model(ids, masks)

#                     loss = criterion(pred_masks, target_masks)
#                     if phase == 'train':
#                         loss.backward()
#                         optimizer.step()             
            
#                     epoch_loss += loss.item() * len(ids)
                    
#                     target_masks = target_masks.cpu().detach().numpy()
#                     pred_masks = pred_masks.cpu().detach().numpy()
    
#                     for i in range(len(ids)):
#                         text = tweet[i]
#                         target_masks_ = target_masks[i]
#                         pred_masks_ = pred_masks[i].reshape(pred_masks[i].shape[0])
#                         offsets_ = offsets[i]
                            
#                         idxs = np.array(np.where(pred_masks_>0.5))[0].T
#                         if len(idxs)==0: 
#                             js = 0
#                         else:
#                             pred = get_selected_text2(text, idxs, offsets_)
# #                             print(idxs)
#                             idxs_true = np.array(np.where(target_masks_==1))[0].T
#                             true = get_selected_text2(text, idxs_true, offsets_)
#                             js = jaccard(true,pred)
#                         jaccard_score = js
#                         epoch_jaccard += jaccard_score
# #                 print(i%print_every==0)
#                 break
#                 if j%print_every==0:
# #                     print('!')
#                     t.update(print_every if j+print_every<total else total-j)
                
#             t.close(); 
#             epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
#             epoch_jaccard = epoch_jaccard / len(dataloaders_dict[phase].dataset)
            
#             print('Epoch {}/{} | {:^5} | Loss: {:.4f} | Jaccard: {:.4f}'.format(
#                 epoch + 1, num_epochs, phase, epoch_loss, epoch_jaccard))
            
# #         torch.save(model.state_dict(), filename)
#         break
#     break

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): 
    break

In [None]:
num_epochs = 20
# skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=seed)
# for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): 
#     break
# for fold, (train_idx, val_idx) in enumerate((train_df, train_df.sentiment), start=1): 
#     print(f'Fold: {fold}')

model = TweetModel()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, betas=(0.9, 0.999))
criterion = loss_fn3    
dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size)

train_model(
    model, 
    dataloaders_dict,
    criterion, 
    optimizer, 
    num_epochs,
    folder+f'roberta_fold{fold}.pth',
    device)

In [None]:
torch.min( (optimizer.param_groups[0])['params'][2] )

In [None]:
# for i in range(203):
# #     print(torch.min( (optimizer.param_groups[0])['params'][i] ))
# #     print(torch.max( (optimizer.param_groups[0])['params'][i] ))
# #     print((optimizer.param_groups[0])['params'][i].shape )
#     print(torch.sum(torch.isnan(optimizer.param_groups[0]['params'][i])).cpu().numpy())

In [None]:
len((optimizer.param_groups[0])['params'])

In [None]:
# (pred_masks_.view(pred_masks_.shape[0], pred_masks_.shape[1]) * target_masks_)[0]

In [None]:
p = torch.FloatTensor(3,5)
q = torch.FloatTensor(3,5)
r = p*q
r.shape

In [None]:
# for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): 
#     print(train_idx, val_idx)
#     break

In [None]:
# val_idx.shape

In [None]:
# optimizer.state_dict().keys()

In [None]:
# print(pred_masks.cpu().shape)

In [None]:
# print(target_masks.cpu().shape)
# torch.unique(target_masks.cpu())

In [None]:
# t = torch.tensor(torch.FloatTensor(3,9))
# t = nn.Sigmoid()(t)
# t = t>0.5
# t = t.type(torch.float32)
# p = torch.tensor(torch.FloatTensor(3,9))
# p = nn.Sigmoid()(p)

In [None]:
# nn.BCELoss()(p, t)

In [None]:
# torch.max(pred_masks)
# torch.min(pred_masks)

In [None]:
# model.eval()

In [None]:
# lhs.shape

In [None]:
# config = RobertaConfig.from_pretrained(
#             '../input/roberta-base/config.json', output_hidden_states=True)    
# config.hidden_size

In [None]:
# lhs[0][:10][:10]

In [None]:
# x = torch.stack([hs[-1], hs[-2], hs[-3], hs[-4]])

In [None]:
# x = torch.mean(x, 0)
# x.shape

In [None]:
# nn.Sigmoid()(x)

# Inference

In [None]:
# %%time

# test_df = pd.read_csv('../input/tweet-sentiment-extraction/test.csv')
# test_df['text'] = test_df['text'].astype(str)
# test_loader = get_test_loader(test_df)
# predictions = []
# models = []
# for fold in range(skf.n_splits):
#     model = TweetModel()
#     model.cuda()
#     model.load_state_dict(torch.load(f'roberta_fold{fold+1}.pth'))
#     model.eval()
#     models.append(model)

# for data in test_loader:
#     ids = data['ids'].cuda()
#     masks = data['masks'].cuda()
#     tweet = data['tweet']
#     offsets = data['offsets'].numpy()

#     start_logits = []
#     end_logits = []
#     for model in models:
#         with torch.no_grad():
#             output = model(ids, masks)
#             start_logits.append(torch.softmax(output[0], dim=1).cpu().detach().numpy())
#             end_logits.append(torch.softmax(output[1], dim=1).cpu().detach().numpy())

#     start_logits = np.mean(start_logits, axis=0)
#     end_logits = np.mean(end_logits, axis=0)
#     for i in range(len(ids)):    
#         start_pred = np.argmax(start_logits[i])
#         end_pred = np.argmax(end_logits[i])
#         if start_pred > end_pred:
#             pred = tweet[i]
#         else:
#             pred = get_selected_text(tweet[i], start_pred, end_pred, offsets[i])
#         predictions.append(pred)

# Submission

In [None]:
# sub_df = pd.read_csv('../input/tweet-sentiment-extraction/sample_submission.csv')
# sub_df['selected_text'] = predictions
# sub_df['selected_text'] = sub_df['selected_text'].apply(lambda x: x.replace('!!!!', '!') if len(x.split())==1 else x)
# sub_df['selected_text'] = sub_df['selected_text'].apply(lambda x: x.replace('..', '.') if len(x.split())==1 else x)
# sub_df['selected_text'] = sub_df['selected_text'].apply(lambda x: x.replace('...', '.') if len(x.split())==1 else x)
# sub_df.to_csv('submission.csv', index=False)
# sub_df.head()

In [None]:
# from tqdm.notebook import tqdm_notebook
# import time
# t2 = tqdm_notebook(total=10)
# for j in range(10):
#     t = tqdm_notebook(total=100)
#     for i in range(100):
#         time.sleep(0.01)
#         if i%12==0: 
#             if 100-i>12: t.update(12)
#             else: t.update(100-i)
#     t.close(); del t
#     t2.update(1)
# t2.close()

In [None]:
# from tqdm.notebook import tqdm
# import time
# import sys

# t2 = tqdm(total=10, file=sys.stdout)
# for j in range(10):
#     t3 = tqdm(total=100, file=sys.stdout, disable=True)
#     for i in range(100):
#         time.sleep(0.01)
#         if i%12==0: 
#             if 100-i>12: t3.update(12)
#             else: t3.update(100-i)
#             t3.refresh()
#     t3.close(); del t3
#     t2.update(1)
# t2.close()

In [None]:
# %10

In [None]:
# batch_size=48
# dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size)
# tl, vl = dataloaders_dict.values()
# len(tl)

In [None]:
# len(train_df)

In [None]:
# import torch
# a = torch.FloatTensor(3,5)
# a.size(1)