# Training Pipeline

All dataloading and training in one place!

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import KFold, GroupKFold, train_test_split
from datetime import datetime
from tqdm.notebook import tqdm
import albumentations as A
import matplotlib.pyplot as plt
import os
from typing import List, Tuple, Union
import pickle
from lightning.pytorch.loggers.tensorboard import TensorBoardLogger
from torcheval.metrics import MulticlassAccuracy
from torcheval.metrics.functional import multiclass_f1_score

#Local packages
from src import CNNDetector

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
"""
Configure

Change the paths of the data directory to the location of your HMS Dataset.
The sub directories of TRAIN/TEST_EEG, TRAIN/TEST_SPEC should remain the same

"""

#Kaggle
# DATA_ROOT = "/kaggle/input/hms-harmful-brain-activity-classification/"
# TRAIN_EEG = "train_eegs"
# TRAIN_SPEC = "train_spectrograms"
# TEST_EEG = "test_eegs"
# TEST_SPEC = "test_spectrograms"

#Local
DATA_ROOT = "/home/benluo/HBAC/data/hbac"
TRAIN_EEG = "train_eegs"
TRAIN_SPEC = "train_spectrograms"
TEST_EEG = "test_eegs"
TEST_SPEC = "test_spectrograms"

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [4]:
#Get reduced train list as the entire dataset is heavy on RAM, 

train_list = pd.read_csv(os.path.join(DATA_ROOT, "train.csv"))

reduced_len = train_list.shape[0]//2 #32 GB of RAM during training
# reduced_len = train_list.shape[0]//4 #Less RAM but less data

train_list_reduced = train_list.iloc[:reduced_len,:]

train_list_reduced.to_csv(os.path.join(DATA_ROOT, "train_reduced.csv"))

### Generate Data

Since test data does not come with labels, for local training and testing, we will only use the data from `train.csv`

The Dataset follows a train-val-test split.

The train and test datasets are split first by `test_size`.

The train and validation datasets are split by Group K-Folds into `val_folds` uniform groups. One group will be chosen randomly for validation while the rest will be used for training every epoch.

In [5]:
def generate_data(val_folds:int = 5,
                  test_size:float = 0.1,
                  saved_spec:str = "all_spec.pkl",
                  saved_eeg:str = "all_eeg.pkl"):

    #Use reduced csv file
    train_list = pd.read_csv(os.path.join(DATA_ROOT, "train_reduced.csv"))

    print("All data", train_list.shape)
    display(train_list.head())

    label_cols = train_list.columns[-6:]

    #Create new df to be formated for training and testing
    train_df = train_list[['spectrogram_id','eeg_id','patient_id','spectrogram_label_offset_seconds','eeg_label_offset_seconds']]

    #Normalise labels into probabilities
    aux = train_list[label_cols].copy()
    
    for label in label_cols:
        train_df[label] = aux[label].values
        
    y_data = train_df[label_cols].values
    y_data = y_data / y_data.sum(axis=1,keepdims=True)
    train_df[label_cols] = y_data

    #Target label/class
    aux = train_list['expert_consensus'].copy()
    train_df['target'] = aux
    train_df = train_df.reset_index()

    #Sort df by patient id so testing and training data will be less similar
    train_df = train_df.sort_values("patient_id")

    test_df = train_df.iloc[int((1-test_size)*train_df.shape[0]):]

    train_df = train_df.iloc[:int((1-test_size)*train_df.shape[0])]

    train_df = train_df.reset_index()
    test_df = test_df.reset_index()

    print("Training data")
    display(train_df.head())

    print("Testing data")
    display(test_df.head())


    gkf = GroupKFold(n_splits=val_folds)

    #KFold grouped by patient id
    for fold, (train_index, val_index) in enumerate(gkf.split(train_df, train_df.target, train_df.patient_id)):
        train_df.loc[val_index, "fold"] = int(fold)
    
    all_eeg = {}
    
    all_spec = {}

    #Try loading eeg and spec data if they have been generated previously
    #If not, read eeg and spec data from train_list and save the collection in data folder
    #Data is saved as float16 due to space constraints
    try:

        with open(os.path.join(DATA_ROOT,saved_spec), "rb") as handle:
            all_spec = pickle.load(handle)

        with open(os.path.join(DATA_ROOT,saved_eeg), "rb") as handle:
            all_eeg = pickle.load(handle)

    except:
    
        for idx, row in tqdm(train_list.iterrows()):

            spec_id = row["spectrogram_id"]
            eeg_id = row["eeg_id"]

            if spec_id not in all_spec:

                spec = pd.read_parquet(os.path.join(DATA_ROOT, TRAIN_SPEC, str(spec_id)+".parquet"))

                all_spec[spec_id] = spec.iloc[:,1:].values.astype(dtype=np.float32)

            if eeg_id not in all_eeg:

                eeg = pd.read_parquet(os.path.join(DATA_ROOT, TRAIN_EEG, str(eeg_id)+".parquet"))

                all_eeg[eeg_id] = eeg.iloc[:,1:].values.astype(dtype=np.float32)
        
        with open(os.path.join(DATA_ROOT, saved_eeg), "wb") as handle:
            pickle.dump(all_eeg, handle)
        
        with open(os.path.join(DATA_ROOT, saved_spec), "wb") as handle:
            pickle.dump(all_spec, handle)
    
    return train_df, test_df, all_eeg, all_spec, label_cols

In [6]:
train_df, test_df, all_eeg, all_spec, label_cols = generate_data()

All data (53400, 16)


Unnamed: 0.1,Unnamed: 0,eeg_id,eeg_sub_id,eeg_label_offset_seconds,spectrogram_id,spectrogram_sub_id,spectrogram_label_offset_seconds,label_id,patient_id,expert_consensus,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote
0,0,1628180742,0,0.0,353733,0,0.0,127492639,42516,Seizure,3,0,0,0,0,0
1,1,1628180742,1,6.0,353733,1,6.0,3887563113,42516,Seizure,3,0,0,0,0,0
2,2,1628180742,2,8.0,353733,2,8.0,1142670488,42516,Seizure,3,0,0,0,0,0
3,3,1628180742,3,18.0,353733,3,18.0,2718991173,42516,Seizure,3,0,0,0,0,0
4,4,1628180742,4,24.0,353733,4,24.0,3080632009,42516,Seizure,3,0,0,0,0,0


Training data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,level_0,index,spectrogram_id,eeg_id,patient_id,spectrogram_label_offset_seconds,eeg_label_offset_seconds,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,target
0,41744,41744,802850878,1873660287,56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Other
1,49045,49045,957002006,165634434,56,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Other
2,41746,41746,802850878,2057577408,56,290.0,46.0,0.0,0.0,0.0,0.0,0.0,1.0,Other
3,41745,41745,802850878,2057577408,56,244.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Other
4,25278,25278,497667405,374550767,56,694.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Other


Testing data


Unnamed: 0,level_0,index,spectrogram_id,eeg_id,patient_id,spectrogram_label_offset_seconds,eeg_label_offset_seconds,seizure_vote,lpd_vote,gpd_vote,lrda_vote,grda_vote,other_vote,target
0,5410,5410,91996359,3686473557,57272,122.0,122.0,0.0,0.0,0.0,1.0,0.0,0.0,LRDA
1,5389,5389,91996359,3686473557,57272,72.0,72.0,0.0,0.0,0.0,1.0,0.0,0.0,LRDA
2,5384,5384,91996359,3686473557,57272,60.0,60.0,0.0,0.0,0.0,1.0,0.0,0.0,LRDA
3,5394,5394,91996359,3686473557,57272,82.0,82.0,0.0,0.0,0.0,1.0,0.0,0.0,LRDA
4,5385,5385,91996359,3686473557,57272,62.0,62.0,0.0,0.0,0.0,1.0,0.0,0.0,LRDA


In [7]:
class HMSDataset(Dataset):

    def __init__(self,
                 df:pd.DataFrame = None,
                 aug: bool = False) -> None:
        

        super(HMSDataset, self).__init__()

        self.df = df

        self.eeg_sample_freq = 200 # 200 Hz
        self.spec_sample_freq = 0.5 # 0.5 Hz
    
        
        #data augmentation
        self.aug = aug
        self.transforms = A.Compose([
            A.HorizontalFlip(p=0.5)
        ])

    def format_data(self, eeg:np.array, spec:np.array) -> Tuple[torch.tensor]:

        #Epsilon for numerical stability during division (prevent division by zero)
        eps = 1e-6

        #Convert data from saved float16 to float32 during training and testing
        eeg = eeg.astype(dtype=np.float32)
        spec = spec.astype(dtype=np.float32)

        #Normalising and getting rid of Nans
        eeg_mean = np.nanmean(eeg.flatten())
        eeg_std = np.nanstd(eeg.flatten())
        eeg = (eeg-eeg_mean)/(eeg_std+eps)
        eeg = np.nan_to_num(eeg, nan=0.0)

        #Limiting range of spec data
        spec = np.clip(spec, np.exp(-4), np.exp(8))
        spec = np.log(spec)
        
        #Normalising and getting rid of Nans
        spec_mean = np.nanmean(spec.flatten())
        spec_std = np.nanstd(spec.flatten())
        spec = (spec-spec_mean)/(spec_std+eps)
        spec = np.nan_to_num(spec, nan=0.0)
        
        #If data augmentation
        if self.aug:

            eeg = self.transforms(image=eeg)["image"]
            spec = self.transforms(image=spec)["image"]

        #Convert to tensors
        eeg = torch.tensor(eeg.copy())
        spec = torch.tensor(spec.copy())

        return eeg, spec


    def __getitem__(self, index) -> dict:
    
        row = self.df.iloc[index]

        eeg_id = row["eeg_id"]
        spec_id = row["spectrogram_id"]

        #EEG Sub-sampling
        start = int(row["eeg_label_offset_seconds"]*self.eeg_sample_freq)
        end = int((row["eeg_label_offset_seconds"]+50)*self.eeg_sample_freq)
        eeg = all_eeg[eeg_id][start:end]
        
        #Spectrogram Sub-sampling
        start = int(row["spectrogram_label_offset_seconds"]*self.spec_sample_freq)
        end = int((row["spectrogram_label_offset_seconds"]+600)*self.spec_sample_freq)
        spec = all_spec[spec_id][start:end].T

        #Normalizing and getting rid of Nans
        eeg, spec = self.format_data(eeg, spec)

        #Convert label to tensor
        label = torch.tensor(row[label_cols], dtype=torch.float32)

        return eeg, spec, label


    def __len__(self):

        return self.df.shape[0]

    @staticmethod
    def collate_fn(batch):

        eeg, spec, label = zip(*batch)
        
        if eeg is not None:
            eeg = torch.stack(eeg, dim=0).float().unsqueeze(1)
        
        if spec is not None:
            spec = torch.stack(spec, dim=0).float().unsqueeze(1).expand(-1,3,-1,-1)

        if label is not None:            
            label = torch.stack(label, dim=0)
        
        return {
            "eeg": eeg,
            "spec": spec,
            "label": label
        }
    

### Model

Load your model here, the current models are supported:

1. CNN
    - ConvNext
    - EfficientNet b0
    - EfficientNet v2s
2. Vision Transformers
    - ViT tiny
    - Hiera tiny
3. Custom Architecture
    - dual stream

In [8]:
model_config = {
    "model_name": "simple",
    "num_classes": 6,
}

model = CNNDetector(model_config).to(device)

NaiveCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (batch_norm2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (avg_pool): AvgPool2d(kernel_size=5, stride=5, padding=0)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=8512, out_features=512, bias=True)
  (drop1): Dropout(p=0.1, inplace=False)
  (fc2): Linear(in_features=512, out_features=6, bias=True)
)


### Training and Testing

Validation is done every `test_step` to check if model is overfitting on training data.

Testing is done at the end of every epoch with the model produced by that epoch.

Training and testing metrics can be viewed in tensorboard as mentioned in the README.

In [9]:
@torch.no_grad()
def validate(model, valid_dataloader):

    model.eval()

    loss_fn = nn.KLDivLoss(reduction="batchmean")

    loss = torch.tensor([0.]).to(device)

    for batch in tqdm(valid_dataloader):

        for key in batch:

            batch[key] = batch[key].to(device)

        predictions = model(batch)
        
        predictions = F.log_softmax(predictions,dim=1)

        loss += loss_fn(predictions,batch["label"])

    loss = loss / len(valid_dataloader)

    model.train()

    print("Validation loss", loss.item())

    return loss.item()

@torch.no_grad()
def test(model, test_dataloader):

    model.eval()

    loss_fn = nn.KLDivLoss(reduction="batchmean")

    num_classes = 6

    acc = MulticlassAccuracy(average="macro", num_classes=num_classes)
    f1_score = torch.tensor([0.]).to(device)
    loss = torch.tensor([0.]).to(device)
     
    for batch in tqdm(test_dataloader):

        for key in batch:
            batch[key] = batch[key].to(device)

        predictions = model(batch)

        loss += loss_fn(F.log_softmax(predictions, dim=1), batch["label"])

        predictions = F.softmax(predictions, dim=1)

        predictions = torch.argmax(predictions, dim=1)

        classes = torch.argmax(batch["label"], dim=1)

        f1_score += multiclass_f1_score(predictions, classes, num_classes=num_classes)

        acc.update(predictions, classes)

    model.train()

    f1_score /= len(test_dataloader)
    loss /= len(test_dataloader)

    return acc.compute(), f1_score, loss

#training

def train_epoch(model=None,
          train_dataloader=None,
          valid_dataloader=None,
          test_dataloader=None,
          optimiser = None,
          train_config=None,
          valid_config=None,
          lr_scheduler=None,
          min_valid_loss=100.,
          min_test_loss=100.,
          model_name="model",
          epoch=0,
          logger=None):
    
    """
    Training Function
    """

    assert(train_dataloader is not None)
    
    assert(model is not None)

    #Training Params

    save_model = train_config.get("save_model", True)
    to_model_keys = ["spec", "eeg", "label"]
    valid_step = valid_config.get("valid_step", 1000)
    verbose_step = train_config.get("verbose_step", 10)
    
    
    loss_fn = nn.KLDivLoss(reduction="batchmean")
    
    model.train()
    
    for itr, batch in tqdm(enumerate(train_dataloader)):

#       print(batch["eeg"].shape)

        #Train One Iteration

        #Load data to GPU

        for key in to_model_keys:

            batch[key] = batch[key].to(device)

        predictions = model(batch)
        
        predictions = F.log_softmax(predictions,dim=1)

        loss = loss_fn(predictions,batch["label"])
        
        optimiser.zero_grad()

        loss.backward()

        optimiser.step()
        
        if lr_scheduler is not None:
            lr_scheduler.step()
        
        if itr%verbose_step==0:
            print(f"Training itr {itr}/{len(train_dataloader)}")
            for param_group in optimiser.param_groups:
                lr = param_group['lr']
                break
            print("Training Loss: ", loss.item(), "Learning Rate:", lr)

            logger.log_metrics({"train/loss":loss, "train/lr": lr}, itr+epoch*len(train_dataloader))
        
        #Validation
        
        if itr%valid_step==0 and itr>0 and valid_dataloader is not None:
            print("Validation")
            valid_loss = validate(model, valid_dataloader)
            
            if valid_loss < min_valid_loss and save_model:
                min_valid_loss = valid_loss
                save_path = os.path.join(DATA_ROOT,"models")
                os.makedirs(save_path, exist_ok=True)
                torch.save(model.state_dict(), os.path.join(save_path, f"{model_name}_best.pt"))
            logger.log_metrics({"valid/loss":valid_loss}, itr+epoch*len(train_dataloader))
        
    #Test
    if test_dataloader is not None:
        acc, f1_score, loss = test(model, test_dataloader)
        if loss < min_test_loss:
            min_test_loss = loss
        logger.log_metrics({"test/acc":acc, "test/f1_score":f1_score, "test/loss":loss}, itr+epoch*len(train_dataloader))
    return min_valid_loss, min_test_loss

In [10]:
def train(model=None,
          optimiser=None,
          lr_scheduler=None,
          train_config=None,
          valid_config=None,
          test_dataloader=None,
          model_name="model",
          logger=None):
    
    train_batch_size = train_config.get("batch_size", 32)
    num_epochs = train_config.get("num_epochs", 10)
    valid_folds = train_config.get("valid_folds", 5)
    valid_batch_size = valid_config.get("batch_size", 32)
    train_workers = train_config.get("workers", 1)
    valid_workers = valid_config.get("workers", 1)

    min_valid_loss = 100.
    min_test_loss = 100.

    for epoch in range(num_epochs):

        print("Epoch", epoch)

        #KGFolds
        fold = np.random.randint(valid_folds)
        train_df_fold = train_df[train_df["fold"]!=fold]
        valid_df = train_df[train_df["fold"]==fold]
        train_dataset = HMSDataset(train_df_fold, aug=True)
        valid_dataset = HMSDataset(valid_df, aug=False)

        train_dataloader = DataLoader(
            dataset=train_dataset,
            batch_size = train_batch_size,
            shuffle=True,
            num_workers=train_workers,
            collate_fn=train_dataset.collate_fn
        )

        valid_dataloader = DataLoader(
            dataset=valid_dataset,
            batch_size = valid_batch_size,
            shuffle=True,
            num_workers=valid_workers,
            collate_fn=valid_dataset.collate_fn
        )

        min_valid_loss, min_test_loss = train_epoch(model=model,
                    train_dataloader=train_dataloader,
                    valid_dataloader=valid_dataloader,
                    test_dataloader=test_dataloader,
                    optimiser=optimiser,
                    train_config=train_config,
                    valid_config=valid_config,
                    lr_scheduler=lr_scheduler,
                    min_valid_loss=min_valid_loss,
                    min_test_loss=min_test_loss,
                    model_name=model_name,
                    epoch=epoch,
                    logger=logger)
        print("Min Valid Loss", min_valid_loss, "Min Test Loss", min_test_loss)

#### Hyperparameters

Configuration of hyperparamters for training and validation/testing is shown below.

In [11]:
train_config = {
    "batch_size": 32,
    "num_epochs": 5,
    "lr": 1e-3,
    "valid_folds":5,
    "save_model": True,
    "workers": 1,
    "verbose_step": 100,
    "weight_decay": 1e-4,
    "valid_folds": 5,
}

valid_config = {
    "batch_size": 32,
    "workers": 1,
    "valid_step": 500
}

lr = train_config.get("lr", 1e-3)
weight_decay = train_config.get("weight_decay", 0.)
model_name = model_config.get("model_name", "efficientnet_b0")
num_epochs = train_config.get("num_epochs", 1)
folds = train_config.get("valid_folds", 5)
train_batch_size = train_config.get("batch_size", 32)
test_batch_size = valid_config.get("batch_size", 32)


logger = TensorBoardLogger(f"logs/{model_name}", name=model_name)

optimiser = torch.optim.Adam(
model.parameters(),
lr=lr,
weight_decay=weight_decay
)

lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimiser,
    max_lr=lr,
    epochs=num_epochs,
    steps_per_epoch=int((folds-1)/folds*train_df.shape[0]/train_batch_size)+10
)

test_dataset = HMSDataset(df=test_df)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=test_batch_size,
    collate_fn=test_dataset.collate_fn
)

torch.cuda.empty_cache()

train(model=model,
      train_config=train_config,
      valid_config=valid_config,
      test_dataloader=test_dataloader,
      model_name=model_name,
      optimiser=optimiser,
      lr_scheduler=lr_scheduler,
      logger=logger)

    

Epoch 0


0it [00:00, ?it/s]

Training itr 0/1202
Training Loss:  1.4071985483169556 Learning Rate: 4.0000718651559374e-05
Training itr 100/1202
Training Loss:  0.8758535981178284 Learning Rate: 4.731232458808946e-05
Training itr 200/1202
Training Loss:  0.9276068210601807 Learning Rate: 6.874272265430651e-05
Training itr 300/1202
Training Loss:  1.0700860023498535 Learning Rate: 0.00010365180448417455
Training itr 400/1202
Training Loss:  1.002517819404602 Learning Rate: 0.00015099686451240733
Training itr 500/1202
Training Loss:  0.7169563174247742 Learning Rate: 0.0002093637447321907
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  np.subtract(arr, avg, out=arr, casting='unsafe')


Validation loss 1.1432820558547974
Training itr 600/1202
Training Loss:  0.668945848941803 Learning Rate: 0.00027700907443185254
Training itr 700/1202
Training Loss:  0.7894634008407593 Learning Rate: 0.0003519123432442852
Training itr 800/1202
Training Loss:  0.8922998905181885 Learning Rate: 0.00043183625212995895
Training itr 900/1202
Training Loss:  0.7074231505393982 Learning Rate: 0.0005143935396592931
Training itr 1000/1202
Training Loss:  0.5392115116119385 Learning Rate: 0.0005971182875482926
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  np.subtract(arr, avg, out=arr, casting='unsafe')


Validation loss 0.9791955947875977
Training itr 1100/1202
Training Loss:  0.8486787676811218 Learning Rate: 0.0006775395756097369
Training itr 1200/1202
Training Loss:  0.6995716691017151 Learning Rate: 0.0007532552861071061


  0%|          | 0/167 [00:00<?, ?it/s]

Min Valid Loss 0.9791955947875977 Min Test Loss tensor([1.0677], device='cuda:0')
Epoch 1


0it [00:00, ?it/s]

Training itr 0/1202
Training Loss:  0.9405093789100647 Learning Rate: 0.0007547057640276813


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  np.subtract(arr, avg, out=arr, casting='unsafe')


Training itr 100/1202
Training Loss:  0.6319909691810608 Learning Rate: 0.0008232932413689126
Training itr 200/1202
Training Loss:  0.5581968426704407 Learning Rate: 0.0008828215990165849
Training itr 300/1202
Training Loss:  0.5922495126724243 Learning Rate: 0.0009315127738834456
Training itr 400/1202
Training Loss:  0.64671790599823 Learning Rate: 0.0009679124006194855
Training itr 500/1202
Training Loss:  0.556000828742981 Learning Rate: 0.0009909332523088843
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

Validation loss 0.7835639119148254
Training itr 600/1202
Training Loss:  0.6133100986480713 Learning Rate: 0.000999887715043859
Training itr 700/1202
Training Loss:  0.5110162496566772 Learning Rate: 0.0009989488189705994
Training itr 800/1202
Training Loss:  0.6439278721809387 Learning Rate: 0.000995179222721968
Training itr 900/1202
Training Loss:  0.42979896068573 Learning Rate: 0.0009886904512612289
Training itr 1000/1202
Training Loss:  0.5818831324577332 Learning Rate: 0.0009795181364908678
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

Validation loss 0.8320747017860413
Training itr 1100/1202
Training Loss:  0.48330605030059814 Learning Rate: 0.0009677126465009682
Training itr 1200/1202
Training Loss:  0.6333762407302856 Learning Rate: 0.0009533388089820503


  0%|          | 0/167 [00:00<?, ?it/s]

Min Valid Loss 0.7835639119148254 Min Test Loss tensor([0.9174], device='cuda:0')
Epoch 2


0it [00:00, ?it/s]

Training itr 0/1202
Training Loss:  0.4596954584121704 Learning Rate: 0.0009530256538704397
Training itr 100/1202
Training Loss:  0.49300381541252136 Learning Rate: 0.0009361135115920281
Training itr 200/1202
Training Loss:  0.4745562672615051 Learning Rate: 0.0009168065426583897
Training itr 300/1202
Training Loss:  0.4941520094871521 Learning Rate: 0.0008952107677605748
Training itr 400/1202
Training Loss:  0.355582594871521 Learning Rate: 0.000871444776149247
Training itr 500/1202
Training Loss:  0.27171337604522705 Learning Rate: 0.000845639074423489
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

  np.subtract(arr, avg, out=arr, casting='unsafe')
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Validation loss 0.6541392207145691
Training itr 600/1202
Training Loss:  0.3697245121002197 Learning Rate: 0.0008179353698776673
Training itr 700/1202
Training Loss:  0.31951791048049927 Learning Rate: 0.0007884857923417252
Training itr 800/1202
Training Loss:  0.524151086807251 Learning Rate: 0.0007574520587880278
Training itr 900/1202
Training Loss:  0.39750880002975464 Learning Rate: 0.0007250045852921661
Training itr 1000/1202
Training Loss:  0.433972030878067 Learning Rate: 0.0006913215512242259
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  np.subtract(arr, avg, out=arr, casting='unsafe')


Validation loss 0.70098876953125
Training itr 1100/1202
Training Loss:  0.4093671441078186 Learning Rate: 0.0006565879208093442
Training itr 1200/1202
Training Loss:  0.29779481887817383 Learning Rate: 0.000620994427430473


  0%|          | 0/167 [00:00<?, ?it/s]

Min Valid Loss 0.6541392207145691 Min Test Loss tensor([0.9174], device='cuda:0')
Epoch 3


0it [00:00, ?it/s]

Training itr 0/1202
Training Loss:  0.32776200771331787 Learning Rate: 0.0006202751224848331
Training itr 100/1202
Training Loss:  0.25582805275917053 Learning Rate: 0.0005840059536779711
Training itr 200/1202
Training Loss:  0.21456320583820343 Learning Rate: 0.0005472754925133859
Training itr 300/1202
Training Loss:  0.3157636225223541 Learning Rate: 0.0005102854376051195
Training itr 400/1202
Training Loss:  0.4657280147075653 Learning Rate: 0.00047323891307880924
Training itr 500/1202
Training Loss:  0.18931114673614502 Learning Rate: 0.00043633935315267897
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  np.subtract(arr, avg, out=arr, casting='unsafe')


Validation loss 0.7153773903846741
Training itr 600/1202
Training Loss:  0.2605585753917694 Learning Rate: 0.00039978938501571213
Training itr 700/1202
Training Loss:  0.414374977350235 Learning Rate: 0.00036378971613747907
Training itr 800/1202
Training Loss:  0.43289846181869507 Learning Rate: 0.0003285380321197522
Training itr 900/1202
Training Loss:  0.3011932671070099 Learning Rate: 0.00029422791114215596
Training itr 1000/1202
Training Loss:  0.25896281003952026 Learning Rate: 0.00026104776096297507
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

  np.subtract(arr, avg, out=arr, casting='unsafe')
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Validation loss 0.735427975654602
Training itr 1100/1202
Training Loss:  0.4116417169570923 Learning Rate: 0.00022917978431238623
Training itr 1200/1202
Training Loss:  0.28191816806793213 Learning Rate: 0.00019879897835946734


  0%|          | 0/167 [00:00<?, ?it/s]

Min Valid Loss 0.6541392207145691 Min Test Loss tensor([0.9174], device='cuda:0')
Epoch 4


0it [00:00, ?it/s]

Training itr 0/1202
Training Loss:  0.36982303857803345 Learning Rate: 0.00019820769186297346


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  np.subtract(arr, avg, out=arr, casting='unsafe')


Training itr 100/1202
Training Loss:  0.3076062798500061 Learning Rate: 0.00016951560806295973
Training itr 200/1202
Training Loss:  0.36105018854141235 Learning Rate: 0.0001426383298793154
Training itr 300/1202
Training Loss:  0.35199475288391113 Learning Rate: 0.00011772344897266662
Training itr 400/1202
Training Loss:  0.41299915313720703 Learning Rate: 9.490778085767242e-05
Training itr 500/1202
Training Loss:  0.41763240098953247 Learning Rate: 7.431661360563149e-05
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

Validation loss 0.24429160356521606
Training itr 600/1202
Training Loss:  0.28014373779296875 Learning Rate: 5.606301984793902e-05
Training itr 700/1202
Training Loss:  0.2789078950881958 Learning Rate: 4.0247235858397865e-05
Training itr 800/1202
Training Loss:  0.3973802626132965 Learning Rate: 2.695611112404248e-05
Training itr 900/1202
Training Loss:  0.3737504482269287 Learning Rate: 1.62626314270539e-05
Training itr 1000/1202
Training Loss:  0.22196350991725922 Learning Rate: 8.225518056677635e-06
Validation


  0%|          | 0/301 [00:00<?, ?it/s]

Validation loss 0.24075496196746826
Training itr 1100/1202
Training Loss:  0.3446682095527649 Learning Rate: 2.888905351996413e-06
Training itr 1200/1202
Training Loss:  0.25690239667892456 Learning Rate: 2.820983462736311e-07


  0%|          | 0/167 [00:00<?, ?it/s]

Min Valid Loss 0.24075496196746826 Min Test Loss tensor([0.9174], device='cuda:0')
