# Project Overview

Built a sentiment analysis model using TinyBERT and Hugging Face Transformers.
Fine-tuned the model on a real-world dataset of customer reviews to classify sentiments as Positive or Negative.
Achieved high accuracy, and deployed the model for real-time inference.

## Install everything

In [None]:
pip install transformers datasets torch scikit-learn

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


## Load a real dataset (Amazon reviews)

In [11]:
from datasets import load_dataset

dataset = load_dataset('amazon_polarity')
train_data = dataset['train']
test_data = dataset['test']

print(f"Number of training samples: {len(train_data)}")
print(f"Number of testing samples: {len(test_data)}")
print()
print(f"First training sample: {train_data[0]}")
print(f"First training sample: {train_data[1]}")


README.md: 0.00B [00:00, ?B/s]

amazon_polarity/train-00000-of-00004.par(…):   0%|          | 0.00/260M [00:00<?, ?B/s]

amazon_polarity/train-00001-of-00004.par(…):   0%|          | 0.00/258M [00:00<?, ?B/s]

amazon_polarity/train-00002-of-00004.par(…):   0%|          | 0.00/255M [00:00<?, ?B/s]

amazon_polarity/train-00003-of-00004.par(…):   0%|          | 0.00/254M [00:00<?, ?B/s]

amazon_polarity/test-00000-of-00001.parq(…):   0%|          | 0.00/117M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3600000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/400000 [00:00<?, ? examples/s]

Number of training samples: 3600000
Number of testing samples: 400000

First training sample: {'label': 1, 'title': 'Stuning even for the non-gamer', 'content': 'This sound track was beautiful! It paints the senery in your mind so well I would recomend it even to people who hate vid. game music! I have played the game Chrono Cross but out of all of the games I have ever played it has the best music! It backs away from crude keyboarding and takes a fresher step with grate guitars and soulful orchestras. It would impress anyone who cares to listen! ^_^'}
First training sample: {'label': 1, 'title': 'The best soundtrack ever to anything.', 'content': "I'm reading a lot of reviews saying that this is the best 'game soundtrack' and I figured that I'd write a review to disagree a bit. This in my opinino is Yasunori Mitsuda's ultimate masterpiece. The music is timeless and I'm been listening to it for years now and its beauty simply refuses to fade.The price tag on this is pretty staggering I

## Preprocess the text

In [12]:
def preprocess(batch):
    return {
        'text' : [t+". "+c for t,c in zip(batch['title'] ,batch['content'])],
        'label' : batch['label']
    }

train = train_data.map(preprocess ,batched=True)
test = test_data.map(preprocess ,batched=True)


Map:   0%|          | 0/3600000 [00:00<?, ? examples/s]

Map:   0%|          | 0/400000 [00:00<?, ? examples/s]

## Load TinyBERT for fine-tuning

In [13]:
from transformers import AutoTokenizer ,AutoModelForSequenceClassification

model_name = "prajjwal1/bert-tiny"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name ,num_labels = 2)

config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

## Tokenize the data

In [14]:
def tokenize(batch):
    return tokenizer(batch["text"] ,padding="max_length" ,truncation=True ,max_length= 128)

train_encoded = train.map(tokenize ,batched=True)
test_encoded = test.map(tokenize ,batched=True)


Map:   0%|          | 0/3600000 [00:00<?, ? examples/s]

Map:   0%|          | 0/400000 [00:00<?, ? examples/s]

## Fine-tune the model

In [None]:
import torch
torch.cuda.is_available()

True

In [None]:

from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score
import numpy as np



def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds)
    }

training_args = TrainingArguments(
    output_dir="./tinybert-sentiment",
    eval_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01 ,
    report_to="none"

)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_encoded,
    eval_dataset=test_encoded,
    compute_metrics=compute_metrics
)

trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2051,0.166208,0.938577,0.938078
2,0.1652,0.151603,0.943858,0.943941
3,0.1553,0.151864,0.945168,0.945093


TrainOutput(global_step=337500, training_loss=0.17517074652777778, metrics={'train_runtime': 6210.1114, 'train_samples_per_second': 1739.099, 'train_steps_per_second': 54.347, 'total_flos': 3430315008000000.0, 'train_loss': 0.17517074652777778, 'epoch': 3.0})

In [None]:
# After training, SAVE the model
trainer.save_model("/content/my_model")
tokenizer.save_pretrained("/content/my_model")


('/content/my_model/tokenizer_config.json',
 '/content/my_model/special_tokens_map.json',
 '/content/my_model/vocab.txt',
 '/content/my_model/added_tokens.json',
 '/content/my_model/tokenizer.json')

In [None]:
# Save to Google Drive
from google.colab import drive
drive.mount('/content/drive')
!cp -r /content/my_model /content/drive/MyDrive/


Mounted at /content/drive


#**Evaluate** model performance

In [5]:
# mount drive to be seen by colab to import data
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# Verification
!ls /content/drive


MyDrive


In [7]:
# Loading the the model without retraining
from transformers import AutoModelForSequenceClassification ,AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/my_model")
model = AutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/my_model")

###Evaluate Model Performence

In [15]:
# to disable dropout and makes predictions stable.
model.eval()

# testing on testset
import torch
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer


trainer = Trainer(
    model=model,
    tokenizer=tokenizer
)

predictions = trainer.predict(test_encoded)
preds = np.argmax(predictions.predictions, axis=1)

y_true = predictions.label_ids
y_pred = np.argmax(predictions.predictions, axis=1)

accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")




  trainer = Trainer(


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 3


[34m[1mwandb[0m: You chose "Don't visualize my results"


Accuracy: 0.9451675
F1 Score: 0.9450926887884743


In [20]:
# defining a function to predict the sentiment
def predict_sentiment(text):
  inputs = tokenizer(
      text,
      return_tensors="pt",
      truncation = True,
      padding= True
  )
  # Move input tensors to the same device as the model
  inputs = {key: val.to(model.device) for key, val in inputs.items()}
  with torch.no_grad():
    outputs = model(**inputs)
  pred = torch.argmax(outputs.logits ,dim=-1).item()
  return "Positive" if pred==1 else "Negative"

In [23]:
# test the function
predict_sentiment("I really enjoyed this book")
predict_sentiment("Worst purchase ever")


'Negative'