In [1]:
# ! pip install transformers datasets
# ! pip install transformers datasets evaluate
# ! pip install transformers[torch]
# ! pip install wandb

In [None]:
from huggingface_hub import notebook_login
import time
import torch
import math
from tqdm import tqdm
import wandb

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from datasets import load_dataset

dataset = load_dataset("gpt3mix/sst2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/523k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/67.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/138k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/6920 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [None]:
dataset["validation"][0]

{'text': "It 's a lovely film with lovely performances by Buy and Accorsi .",
 'label': 0}

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

In [None]:
tokenized_review = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/6920 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [None]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
id2label = {0: "POSITIVE", 1: "NEGATIVE"}
label2id = {"POSITIVE": 0, "NEGATIVE": 1}

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.weight', 'classifier.bias', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
class CustomTrainer(Trainer):
    def _inner_training_loop(
        self, batch_size=None, args=None, resume_from_checkpoint=None, trial=None, ignore_keys_for_eval=None
    ):
        wandb.init(
        # set the wandb project where this run will be logged
        project="my-awesome-project",

        # track hyperparameters and run metadata
        config={
        "learning_rate": args.learning_rate,
        "architecture": "NN",
        "dataset": "SST2",
        "epochs": args.num_train_epochs,
        }
        )
        number_of_epochs = args.num_train_epochs
        start = time.time()
        train_loss =[]
        train_acc =[]
        eval_acc =[]
        criterion = torch.nn.BCEWithLogitsLoss ().to(device) # to make all the computations happen through cuda
        self.optimizer = torch.optim.Adam(model.parameters(),lr=args.learning_rate )
        self.scheduler = torch.optim.lr_scheduler.StepLR (self.optimizer,1,gamma=0.9)
        train_dataloader = self.get_train_dataloader()
        eval_dataloader = self.get_eval_dataloader()
        max_steps = math . ceil(args.num_train_epochs * len ( train_dataloader ))
        for epoch in range(number_of_epochs):
          train_loss_per_epoch = 0
          train_acc_per_epoch = 0
          with tqdm (train_dataloader,unit=" batch ") as training_epoch :
            training_epoch.set_description(f"Training Epoch { epoch }")
            for step,inputs in enumerate(training_epoch):
              inputs = inputs.to (device)
              #return(inputs)
              labels = inputs['labels'].float()
              # forward pass
              self.optimizer.zero_grad ()
              output = model(**inputs)# TODO Implement by yourself
              # get the loss
              loss = criterion(output[1][:,1],labels) # TODO Implement by yourself
              curr_acc=(output ['logits'].argmax(1)==labels).sum().item()
              # print(f'\tTrain Loss per loop: {loss:.3f} | Train Acc per loop: {curr_acc*100:.2f}%') ---- CHECK THIS
              train_loss_per_epoch += loss.item ()
              # calculate gradients
              loss.backward ()
              # update weights
              self.optimizer.step()
              train_acc_per_epoch += (output ['logits'].argmax(1)==labels).sum().item()
          # adjust the learning rate
          self.scheduler.step ()
          train_loss_per_epoch /= len ( train_dataloader )
          train_acc_per_epoch /= (len ( train_dataloader )* batch_size )
          eval_loss_per_epoch = 0
          eval_acc_per_epoch = 0
          with tqdm ( eval_dataloader , unit =" batch ") as eval_epoch :
            eval_epoch . set_description (f" Evaluation Epoch { epoch }")
            for step,inputs in enumerate(eval_epoch):
              inputs = inputs.to (device)
              labels = inputs['labels'].float()
              # forward pass
              output = model(**inputs)
              loss = criterion(output[1][:,1],labels)
              eval_loss_per_epoch += loss.item ()
              eval_acc_per_epoch += (output['logits'].argmax(1)==labels).sum().item()
            # ... TODO Implement by yourself
          eval_loss_per_epoch /= (len ( eval_dataloader ))
          eval_acc_per_epoch /= ( len ( eval_dataloader )* batch_size )
          print (f'\n\t Train Loss:{train_loss_per_epoch:.3f} | Train Acc : {train_acc_per_epoch * 100 :.2f}% ') # These give the accuracy after each epoch ( epoch is traning through entire training set)
          print (f'\t Eval Loss:{eval_loss_per_epoch:.3f} | Eval Acc : {eval_acc_per_epoch * 100 :.2f}%')
          wandb.log({"val_acc": eval_acc_per_epoch, "loss": train_loss_per_epoch,"train_acc":train_acc_per_epoch})
        print (f'Time :{(time.time()-start)/60:.3f} minutes ')
        wandb.finish()


In [None]:
training_args = TrainingArguments(
    output_dir="homework_1",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=True,
)

cust_trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_review["train"],
    eval_dataset=tokenized_review["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
cust_trainer.train()

VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/global_step,▁

0,1
eval/accuracy,0.9028
eval/loss,0.55772
eval/runtime,3.2856
eval/samples_per_second,554.242
eval/steps_per_second,34.697
train/global_step,0.0


Training Epoch 0: 100%|██████████| 433/433 [00:36<00:00, 11.98 batch /s]
 Evaluation Epoch 0: 100%|██████████| 55/55 [00:01<00:00, 41.11 batch /s]



	 Train Loss:0.016 | Train Acc : 99.48% 
	 Eval Loss:0.703 | Eval Acc : 87.50%


Training Epoch 1: 100%|██████████| 433/433 [00:35<00:00, 12.17 batch /s]
 Evaluation Epoch 1: 100%|██████████| 55/55 [00:01<00:00, 42.14 batch /s]



	 Train Loss:0.002 | Train Acc : 99.84% 
	 Eval Loss:0.634 | Eval Acc : 88.41%


Training Epoch 2: 100%|██████████| 433/433 [00:35<00:00, 12.29 batch /s]
 Evaluation Epoch 2: 100%|██████████| 55/55 [00:01<00:00, 42.44 batch /s]



	 Train Loss:0.001 | Train Acc : 99.87% 
	 Eval Loss:0.721 | Eval Acc : 87.61%


Training Epoch 3: 100%|██████████| 433/433 [00:35<00:00, 12.23 batch /s]
 Evaluation Epoch 3: 100%|██████████| 55/55 [00:01<00:00, 41.65 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.732 | Eval Acc : 87.39%


Training Epoch 4: 100%|██████████| 433/433 [00:35<00:00, 12.24 batch /s]
 Evaluation Epoch 4: 100%|██████████| 55/55 [00:01<00:00, 42.32 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.794 | Eval Acc : 88.52%


Training Epoch 5: 100%|██████████| 433/433 [00:35<00:00, 12.24 batch /s]
 Evaluation Epoch 5: 100%|██████████| 55/55 [00:01<00:00, 41.93 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.814 | Eval Acc : 88.64%


Training Epoch 6: 100%|██████████| 433/433 [00:36<00:00, 12.03 batch /s]
 Evaluation Epoch 6: 100%|██████████| 55/55 [00:01<00:00, 42.11 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.831 | Eval Acc : 88.64%


Training Epoch 7: 100%|██████████| 433/433 [00:35<00:00, 12.18 batch /s]
 Evaluation Epoch 7: 100%|██████████| 55/55 [00:01<00:00, 42.01 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.848 | Eval Acc : 88.64%


Training Epoch 8: 100%|██████████| 433/433 [00:35<00:00, 12.25 batch /s]
 Evaluation Epoch 8: 100%|██████████| 55/55 [00:01<00:00, 41.85 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.863 | Eval Acc : 88.64%


Training Epoch 9: 100%|██████████| 433/433 [00:36<00:00, 11.80 batch /s]
 Evaluation Epoch 9: 100%|██████████| 55/55 [00:01<00:00, 30.86 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.878 | Eval Acc : 88.52%


Training Epoch 10: 100%|██████████| 433/433 [00:37<00:00, 11.67 batch /s]
 Evaluation Epoch 10: 100%|██████████| 55/55 [00:01<00:00, 41.91 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.892 | Eval Acc : 88.52%


Training Epoch 11: 100%|██████████| 433/433 [00:37<00:00, 11.53 batch /s]
 Evaluation Epoch 11: 100%|██████████| 55/55 [00:01<00:00, 40.48 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.907 | Eval Acc : 88.52%


Training Epoch 12: 100%|██████████| 433/433 [00:36<00:00, 11.77 batch /s]
 Evaluation Epoch 12: 100%|██████████| 55/55 [00:01<00:00, 42.04 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.921 | Eval Acc : 88.64%


Training Epoch 13: 100%|██████████| 433/433 [00:36<00:00, 11.90 batch /s]
 Evaluation Epoch 13: 100%|██████████| 55/55 [00:01<00:00, 41.72 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.935 | Eval Acc : 88.64%


Training Epoch 14: 100%|██████████| 433/433 [00:35<00:00, 12.19 batch /s]
 Evaluation Epoch 14: 100%|██████████| 55/55 [00:01<00:00, 42.36 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.948 | Eval Acc : 88.64%


Training Epoch 15: 100%|██████████| 433/433 [00:35<00:00, 12.18 batch /s]
 Evaluation Epoch 15: 100%|██████████| 55/55 [00:01<00:00, 41.41 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.961 | Eval Acc : 88.64%


Training Epoch 16: 100%|██████████| 433/433 [00:35<00:00, 12.22 batch /s]
 Evaluation Epoch 16: 100%|██████████| 55/55 [00:01<00:00, 41.39 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.973 | Eval Acc : 88.64%


Training Epoch 17: 100%|██████████| 433/433 [00:35<00:00, 12.11 batch /s]
 Evaluation Epoch 17: 100%|██████████| 55/55 [00:01<00:00, 41.20 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.985 | Eval Acc : 88.64%


Training Epoch 18: 100%|██████████| 433/433 [00:35<00:00, 12.14 batch /s]
 Evaluation Epoch 18: 100%|██████████| 55/55 [00:01<00:00, 40.99 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:0.997 | Eval Acc : 88.64%


Training Epoch 19: 100%|██████████| 433/433 [00:35<00:00, 12.13 batch /s]
 Evaluation Epoch 19: 100%|██████████| 55/55 [00:01<00:00, 40.97 batch /s]



	 Train Loss:0.000 | Train Acc : 99.88% 
	 Eval Loss:1.008 | Eval Acc : 88.75%
Time :12.425 minutes 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_acc,▁▇██████████████████
val_acc,▂▆▂▁▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█

0,1
loss,0.0
train_acc,0.99885
val_acc,0.8875


In [None]:
cust_trainer.push_to_hub()

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.60k [00:00<?, ?B/s]

events.out.tfevents.1706994496.3f281c5a8cfa.566.0:   0%|          | 0.00/346 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Stonekraken/homework_1/commit/bc8eeecad8ed5a740df01fe5ef84fae926f83e00', commit_message='End of training', commit_description='', oid='bc8eeecad8ed5a740df01fe5ef84fae926f83e00', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
evaluation_results = cust_trainer.evaluate(tokenized_review["test"])
evaluation_results

{'eval_loss': 0.8206174373626709,
 'eval_accuracy': 0.9055464030752334,
 'eval_runtime': 2.7385,
 'eval_samples_per_second': 664.971,
 'eval_steps_per_second': 41.629}

In [None]:
evaluation_results

{'eval_loss': 0.5577191710472107,
 'eval_accuracy': 0.9028006589785832,
 'eval_runtime': 3.2856,
 'eval_samples_per_second': 554.242,
 'eval_steps_per_second': 34.697}

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("Stonekraken/homework_1")
evaluation_results_trainer = cust_trainer.evaluate(tokenized_review["test"])
evaluation_results_trainer

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

{'eval_loss': 0.8206174373626709,
 'eval_accuracy': 0.9055464030752334,
 'eval_runtime': 3.562,
 'eval_samples_per_second': 511.228,
 'eval_steps_per_second': 32.004}

In [None]:
evaluation_results_trainer

{'eval_loss': 0.8206174373626709,
 'eval_accuracy': 0.9055464030752334,
 'eval_runtime': 3.9706,
 'eval_samples_per_second': 458.623,
 'eval_steps_per_second': 28.711}

In [None]:
model.config.hidden_size

768

In [None]:
model.config.dropout

0.1

In [None]:
model.config

DistilBertConfig {
  "_name_or_path": "Stonekraken/homework_1",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "POSITIVE",
    "1": "NEGATIVE"
  },
  "initializer_range": 0.02,
  "label2id": {
    "NEGATIVE": 1,
    "POSITIVE": 0
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.35.2",
  "vocab_size": 30522
}

Analysis of model:

1. The problem statement is to identify the sentiment behind the movie review provided by the audience,Sentiment behind the review can either be positive or negative.In order to solve this problem I am using the pre trained instance of distilbert-base-uncased and fine tuning the model to analyze movie review data.

2. In order to run this model I have used the T4 GPU in google colab to effictively utilize the parallel processing capabilites of the GPU and reduce the computation time.

3. We can observe a downward trend in the loss fucntion as expected, We can also observe an increase in validation and training accuray as well indicating that the model is performing correctly.

4. In this experiment I have used accuracy as the key metric to determine the performance of my model, this is primarly because accuracy is used as the model bench mark in the learderboard provided, hence a comparison can be established between the current board and my model

5. Based on the test set we can observea an accuracy of 90.55%. The orginal paper reported an accuracy score of 85.4% using Recursive Neural Tensor Network. The increase in accuracy of this model can be expalined due to the fact that distilbert-base-uncased has been selected and the inital pretrained model.

6. Training time 12.425 minutes , for the test data we can observe an inference time of 3.9706s.

7. Hyperparameters: Epochs:20, Learning rate:2e-5, Hidden size of model: 768 ,dropout rate:0.1, model type: distilbert, activation funciton:gelu
8. Based on the ten examples with incorrect precition we can observe that the incorrectly predicted some of statement are abmigous without any clear positve or negative sentiment associated with it. Human annotators also seem to have split decision on these statements. Some of the incorrectly predicted statement have double negation which might cause our model to interpret it incorrectly.
9. Combining predictions from multiple sentiment analysis models can help mitigate errors and improve overall performance. Conducting thorough error analysis to understand the types of errors made by the model and iteratively refining the model based on insights gained from the analysis can lead to continuous improvement in sentiment analysis performance.

In [None]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

In [None]:

count=0
i=0
model_name='Stonekraken/homework_1'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
while count!=10:
  text=test['text'][i]
  inputs = tokenizer(text,return_tensors="pt")
  with torch.no_grad():
    logits = model(**inputs).logits
  predicted_class_id = logits.argmax().item()
  if predicted_class_id!=test['label'][i]:
    print(text)
    print('Confidence score:',torch.nn.functional.softmax(logits,dim=1))
    print('Predict:',model.config.id2label[predicted_class_id],"->Actual:",model.config.id2label[test['label'][i]])
    count+=1
  i+=1

Steers turns in a snappy screenplay that curls at the edges ; it 's so clever you want to hate it .
Confidence score: tensor([[0.1399, 0.8601]])
Predict: NEGATIVE ->Actual: POSITIVE
If there 's a way to effectively teach kids about the dangers of drugs , I think it 's in projects like the -LRB- unfortunately R-rated -RRB- Paid .
Confidence score: tensor([[7.9363e-07, 1.0000e+00]])
Predict: NEGATIVE ->Actual: POSITIVE
Ms. Fulford-Wierzbicki is almost spooky in her sulky , calculating Lolita turn .
Confidence score: tensor([[1.7179e-06, 1.0000e+00]])
Predict: NEGATIVE ->Actual: POSITIVE
If this movie were a book , it would be a page-turner , you ca n't wait to see what happens next .
Confidence score: tensor([[4.9354e-05, 9.9995e-01]])
Predict: NEGATIVE ->Actual: POSITIVE
Absorbing and disturbing -- perhaps more disturbing than originally intended -- but a little clarity would have gone a long way .
Confidence score: tensor([[1.0000e+00, 3.5859e-06]])
Predict: POSITIVE ->Actual: NEGATIVE