# Loading Libraries


In [None]:
!pip install datasets -q
!pip install transformers --upgrade
!pip install accelerate>=0.20.1 -q
!pip install nlpaug --quiet



In [None]:
import torch
import datasets
from datasets import load_dataset,Dataset
import transformers
from transformers import TrainingArguments
import warnings
import pandas as pd
warnings.filterwarnings("ignore")

In [None]:
import nlpaug.augmenter.word as naw
import nlpaug.flow as nafc
from nlpaug.util import Action

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# Loading Dataset


In [None]:
ds=load_dataset("carblacac/twitter-sentiment-analysis")

In [None]:
ds=ds.rename_column('feeling','label')
ds

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 119988
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 29997
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 61998
    })
})

# Data Augmentation


In [None]:
from nlpaug.util.file.download import DownloadUtil
# DownloadUtil.download_glove(dest_dir = '.', model_name = 'glove.6B')

aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext"
  model_type = 'glove',
  model_path = 'glove.6B.300d.txt',
  action = "substitute")

In [None]:
from tqdm import tqdm
def augment_data(aug_strategy,n,train_df):
    augmented_tweets = []
    augmented_tweets_labels = []
    text=train_df['text'].values
    label=train_df['label'].values

    if aug_strategy == 'glove':
        for i in tqdm(train_df.index,desc="completed"):
            lst_augment=aug.augment(text[i], n = n)
            for augment in lst_augment:
                augmented_tweets.append(augment)
                augmented_tweets_labels.append(label[i])


    augmented_data = list(zip(augmented_tweets, augmented_tweets_labels))

    df_augmented_data = pd.DataFrame(augmented_data, columns = ['text', 'label'])
    train_df_augmented = pd.concat([train_df, df_augmented_data], axis = 0)

    return train_df_augmented

In [None]:
train_df=ds['train']
train_df.set_format(type='pandas')
train_df=train_df[:500]
print(train_df.head())

                                                text  label
0  @fa6ami86 so happy that salman won.  btw the 1...      0
1  @phantompoptart .......oops.... I guess I'm ki...      0
2  @bradleyjp decidedly undecided. Depends on the...      1
3  @Mountgrace lol i know! its so frustrating isn...      1
4  @kathystover Didn't go much of any where - Lif...      1


In [None]:
train_df_augmented=augment_data(aug_strategy='glove',n=2,train_df=train_df)
len(train_df),len(train_df_augmented)

completed: 100%|██████████| 500/500 [07:30<00:00,  1.11it/s]


(500, 1500)

In [None]:
train_augmented = Dataset.from_pandas(train_df_augmented)

# Tokenizing Dataset


In [None]:
from transformers import AutoTokenizer

In [None]:
student_name='huawei-noah/TinyBERT_General_4L_312D'

In [None]:
student_tokenizer=AutoTokenizer.from_pretrained(student_name)

In [None]:
student_tokenizer.model_input_names

['input_ids', 'token_type_ids', 'attention_mask']

In [None]:
def tokenize_text(batch):
    return student_tokenizer(batch['text'],truncation=True)

In [None]:
train_tokenized=train_augmented.map(tokenize_text,batched=True)
valid_tokenized=ds['validation'].map(tokenize_text,batched=True)
small_valid= valid_tokenized.shuffle().select(range(200))

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
train_tokenized=train_tokenized.remove_columns(["__index_level_0__"])
train_tokenized

Dataset({
    features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 1500
})

# Initializing Student Model


In [None]:
import torch
from transformers import AutoConfig
from transformers import AutoModelForSequenceClassification

student_name='huawei-noah/TinyBERT_General_4L_312D'

num_labels = 2
student_config = (AutoConfig
                  .from_pretrained(student_name, num_labels=2))

In [None]:
def student_init():
  return (AutoModelForSequenceClassification.from_pretrained(student_name, config=student_config).to(device))

# Initializing Teacher Model


In [None]:
teacher_name='save_bert/'

In [None]:
teacher_model = (AutoModelForSequenceClassification
                     .from_pretrained(teacher_name, num_labels=2)
                     .to(device))

# Defining Loss Function


In [None]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import Trainer

In [None]:
class KnowledgeDistillationTrainer(Trainer):
  def __init__(self, *args, teacher_model=None, **kwargs):
    super().__init__(*args, **kwargs)
    self.teacher_model = teacher_model

  def compute_loss(self, model, inputs, return_outputs=False):
    outputs_student = model(**inputs)
    loss_ce = outputs_student.loss
    logits_student = outputs_student.logits

    outputs_teacher = self.teacher_model(**inputs)
    logits_teacher = outputs_teacher.logits

    loss_fct = nn.KLDivLoss(reduction="batchmean")
    loss_kd = self.args.temperature ** 2 * loss_fct(
                F.log_softmax(logits_student / self.args.temperature, dim=-1),
                F.softmax(logits_teacher / self.args.temperature, dim=-1))

    # Return weighted student loss
    loss = self.args.alpha * loss_ce + (1. - self.args.alpha) * loss_kd
    return (loss, outputs_student) if return_outputs else loss

# Training Model


In [None]:
import numpy as np
from datasets import load_metric
accuracy_score = load_metric("accuracy",trust_remote_code=True)

def compute_metrics(pred):
  predictions, labels = pred
  predictions = np.argmax(predictions, axis=1)
  return accuracy_score.compute(predictions=predictions, references=labels)

In [None]:
class KnowledgeDistillationTrainingArguments(TrainingArguments):
  def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
    super().__init__(*args, **kwargs)
    self.alpha = alpha
    self.temperature = temperature

In [None]:
batch_size=32
finetuned_student_ckpt=f"tinybert-finetuned-sentiment/"

student_training_args = KnowledgeDistillationTrainingArguments(output_dir=finetuned_student_ckpt,
                                                               evaluation_strategy = "epoch",
                                                               num_train_epochs=3,
                                                               learning_rate=2e-5,
                                                               per_device_train_batch_size=batch_size,
                                                               per_device_eval_batch_size=batch_size,
                                                               alpha=1,
                                                               weight_decay=0.01,
                                                               disable_tqdm=False,
                                                               logging_steps=len(train_tokenized)//batch_size,
                                                               log_level='error',

                                                               )

In [None]:
tinybert_trainer = KnowledgeDistillationTrainer(model_init=student_init,
        teacher_model=teacher_model, args=student_training_args,
        train_dataset=train_tokenized,
        eval_dataset=small_valid,
        compute_metrics=compute_metrics, tokenizer=student_tokenizer,)

In [None]:
tinybert_trainer.train()

Step,Training Loss
46,0.676
92,0.6074
138,0.5666


TrainOutput(global_step=141, training_loss=0.6151009451412985, metrics={'train_runtime': 2880.9949, 'train_samples_per_second': 1.562, 'train_steps_per_second': 0.049, 'total_flos': 6298068021168.0, 'train_loss': 0.6151009451412985, 'epoch': 3.0})

# Save Model


In [None]:
tinybert_trainer.save_model('/save_tinybert/')

# Comparison in Number of Parameters


In [None]:
from transformers import AutoConfig, AutoModelForSequenceClassification
import os

def compute_parameters(model_path):
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
  parameters = model.num_parameters()
  return parameters

In [None]:
teacher_model_parameters = compute_parameters(model_path=teacher_name)
print("Teacher Model: ", teacher_model_parameters)

Teacher Model:  109483778


In [None]:
student_model_parameters = compute_parameters(model_path='save_tinybert/')
print("Student Model: ", student_model_parameters)

Student Model:  14350874


In [None]:
decrease = -(student_model_parameters-teacher_model_parameters)/teacher_model_parameters
print(f"Student Model is {round(decrease*100,2)} % smaller than Teacher Model.")

Student Model is 86.89 % smaller than Teacher Model.


# Comparison in Inference Time


In [None]:
from transformers import pipeline
import time

pipe = pipeline("text-classification", model=teacher_name, tokenizer='bert-base-uncased')

sample_input = ds['train']['text'][101]

for _ in range(10):
  _ = pipe(sample_input)

start = time.time()
for _ in range(100):
  _ = pipe(sample_input)
total_time_teacher_model = time.time()-start
print("Total time to process 100 requests for Teacher Model: ",total_time_teacher_model)

Total time to process 100 requests for Teacher Model:  29.14437484741211


In [None]:
pipe = pipeline("text-classification", model="save_tinybert/", tokenizer=student_name)

sample_input = ds['train']['text'][101]

#WARMUP
for _ in range(10):
  _ = pipe(sample_input)

start = time.time()
for _ in range(100):
  _ = pipe(sample_input)
total_time_student_model = time.time()-start

print("Total time to process 100 requests for Student Model: ",total_time_student_model)

Total time to process 100 requests for Student Model:  1.494683027267456


In [None]:
decrease_in_time = (total_time_teacher_model-total_time_student_model)/total_time_teacher_model
print(f"Student Model is {round(decrease_in_time*100,2)} % faster than Teacher Model.")

Student Model is 94.87 % faster than Teacher Model.
