In [None]:
import os
os.chdir("../")

from Model.EmoClassifier import TextClassifierModule
from Data.Preprocessing import prepare_data
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint
import torch

# Setup hot realoading of modules when files in working dir change
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
combined_data_path = "./datasets/combined_data.csv"

In [None]:
data_module,classes = prepare_data(data_path=combined_data_path,
                                label_name="broadEmo",
                                dataset_names=["Empathetic Dialogues"], 
                                split_train_val_test=[0.8,0.1,0.1], 
                                batch_size=32, 
                                RANDOM_SEED=5)

Downloading:   0%|          | 0.00/558 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/824k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Training data shape:  (19671, 4) 
Validation data shape:  (2459, 4) 
Test data shape:  (2459, 4)


In [None]:
# Defining network hpyeprapameters here. Hyperparameters can include anything and will be saved in model checkpoints
# These hyperparameters will be avaialable in the neural network module.

hparams = {
  'freeze_feature_extractor': True,
  'freeze_until_layer': 8, 
  'optimizer': 'Adam',
  'featuer_extactor_lr': 3e-5,
  'learning_rate': 3e-5,
  'weight_decay': 1e-5,
  'batch_size': 2, # needed in model for logging,
}

In [None]:
model = TextClassifierModule(len(classes), **hparams).float()

Downloading:   0%|          | 0.00/517M [00:00<?, ?B/s]

Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
summarize(model, max_depth=-1) # obtain full summary of the model

# TensorBoard Logging

TensorBoard provides a good user interface to track training process.
It can be used to track metric curves during traning and compare several versions of models at the same time. It also allows to view model architectures with input and output at each layer.

In [None]:
# Load tensorboard to view visualisations realtime during training
%load_ext tensorboard
%tensorboard --logdir current_lightning_logs

# Training

Pytorch lightning Trainer takes in the data module and lightning module
and performs the trainig loop automatically. It provides a bunch of useful
callback to manage the training session (eg. early stopping, gradient tracking, weight averaging, etc.) Trainer can automatically track validation metrics and save best model checkpoints. Trainer removes the overhead of manually tracking every bit of training loop and eliminates boilerplate

In [None]:
''' 
Pytorch Lightning provides a Trainer class which handles training loop.
The trainer automatically performs validation and training steps while also
logging key metrics which enables effective training.

The trainer takes in data modules and a model and automatically sets up the training loop

The trainier automatically handles GPU or TPU training without the need of
manually casting tensors to device.

The trainer saves model checkpoints so that best models can later be recovered

The trainer supports wide variety of options which make model training more efficient
and fast (16 bit precision, debugging, early stopping, etc.)

Full manual can be found here: https://pytorch-lightning.readthedocs.io/en/latest/common/trainer.html
'''
# Getting a warning about ambigous batch size
# The warning is nothing serious, it happens because pytorch lightning
# does not handle dictionaries as data inputs well. In reality training loop
# works properly

# Warning: https://github.com/PyTorchLightning/pytorch-lightning/issues/10349 
import warnings
warnings.filterwarnings('ignore')

classifier_logger = TensorBoardLogger(save_dir='current_lightning_logs', log_graph = True)

checkpoint_callback = ModelCheckpoint(
  monitor="val_loss",
  dirpath="models/",
  filename="emotion-recognizer",
  save_top_k=1,
  mode="min",
)


trainer = pl.Trainer(
  # overfit_batches=1, # debug option, overfits the given proportion of the whole data
  track_grad_norm=2, # debug option, tracks gradient norms in tensorboard
  default_root_dir=os.getcwd(), # The directory to save and log training results
  max_epochs=30,
  gpus=1 if torch.cuda.is_available() else None, # Uncomment to use GPU training when available
  val_check_interval=0.1, # validate 10 times per epoch, frequent validation is helpful
  logger=classifier_logger, # Logger options to track training
  callbacks=[EarlyStopping(monitor="val_loss", patience=10), checkpoint_callback],
  # checkpoint_callback=False, # toggles checkpointing, might be good to avoid when debugging
  # precision=16, # enable 16 bit precision. Allows multiple times faster training. Works only on GPU so far
)

trainer.fit(model, data_module)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type         | Params | In sizes | Out sizes
--------------------------------------------------------------------------
0 | feature_extractor | RobertaModel | 134 M  | ?        | ?        
1 | classifier        | ModuleList   | 49.9 K | ?        | ?        
--------------------------------------------------------------------------
29.0 M    Trainable params
105 M     Non-trainable params
134 M     Total params
539.799   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Avg-Loss=2.113677501678467


Training: 0it [00:00, ?it/s]