# Text Classfication using TinyBert
* Dataset: <https://www.kaggle.com/columbine/imdb-dataset-sentiment-analysis-in-csv-format>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os 

import torch
import torch.nn as nn

import transformers
import torchflare.callbacks as cbs
import torchflare.metrics as metrics
import torchflare.criterion as crit
from torchflare.experiments import Experiment,ModelConfig
from torchflare.datasets import TextDataloader


In [2]:
df = pd.read_csv("Train.csv")

In [3]:
train_df , valid_df =train_test_split(df , stratify = df.label,  test_size = 0.1, random_state = 42)

In [4]:
tokenizer = transformers.AutoTokenizer.from_pretrained("prajjwal1/bert-tiny")

train_dl = TextDataloader.from_df(
                        df = train_df,
                        input_col = 'text',
                        label_cols = 'label',
                        tokenizer = tokenizer,
                        max_len = 128).get_loader(batch_size = 16 , shuffle = True)

valid_dl = TextDataloader.from_df(
                        df = valid_df,
                        input_col = 'text',
                        label_cols = 'label',
                        tokenizer = tokenizer,
                        max_len = 128).get_loader(batch_size = 16)

In [5]:
class Model(torch.nn.Module):

    def __init__(self,dropout , out_features):

        super(Model , self).__init__()
        self.bert = transformers.BertModel.from_pretrained(
            "prajjwal1/bert-tiny", return_dict=False
        )
        self.bert_drop = nn.Dropout(dropout)
        self.out = nn.Linear(128, out_features)

    def forward(self, x):
        _ , o_2 = self.bert(**x)

        b_o = self.bert_drop(o_2)
        output = self.out(b_o)
        return output


In [6]:
metric_list = [metrics.Accuracy(num_classes=2, multilabel=False)]

callbacks = [
    cbs.EarlyStopping(monitor="val_accuracy", patience=2, mode = "max"),
    cbs.ModelCheckpoint(monitor="val_accuracy" , mode = "max", save_dir = "./",
                       file_name = "model.bin"),
    cbs.ReduceLROnPlateau(mode = "max" , patience = 2)
]

In [7]:
config = ModelConfig(nn_module = Model, module_params = {"dropout" : 0.3 , "out_features" : 1}
                     , optimizer = "AdamW",optimizer_params = {"lr" : 3e-4},
                    criterion = crit.BCEWithLogitsFlat)

In [8]:
exp = Experiment(
    num_epochs=3,
    fp16=True,
    device="cuda",
    seed=42,
)

# Compiling the experiment
exp.compile_experiment(
    model_config = config,
    callbacks = callbacks,
    metrics=metric_list,
    main_metric="accuracy",
)

# Training the models.
exp.fit_loader(train_dl = train_dl , valid_dl = valid_dl)

Some weights of the model checkpoint at prajjwal1/bert-tiny were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



Epoch: 1/3

Epoch: 2/3

Epoch: 3/3


In [9]:
exp.get_logs()

Unnamed: 0,Epoch,train_loss,train_accuracy,val_loss,val_accuracy
0,1,0.442108,0.792111,0.376175,0.7961
1,2,0.288617,0.835605,0.383742,0.835988
2,3,0.181499,0.865793,0.441848,0.864558
