# Assignemnet 2

## Sentiment Analysis of IMDB reviews using bert

In [26]:
#import packages
import pandas as pd
from sklearn.model_selection import train_test_split


df = pd.read_csv('imdb_small.csv')


#df.head()

df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

train_texts, test_texts, train_labels, test_labels = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.2, random_state=42)

print(f"Train size: {len(train_texts)}")
print(f"Validation size: {len(val_texts)}")
print(f"Test size: {len(test_texts)}")

Train size: 640
Validation size: 160
Test size: 200


### Tokenization

In [27]:
from transformers import BertTokenizer

# Load the tokenizer for DistilBERT (or other BERT versions like TinyBERT)
tokenizer = BertTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')

# Tokenize the data
def tokenize_function(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=200) # padding True and max_length 200

train_encodings = tokenize_function(train_texts.tolist())
val_encodings = tokenize_function(val_texts.tolist())
test_encodings = tokenize_function(test_texts.tolist())


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [28]:
val_encodings.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

### DataLoader

In [29]:
#DataLoader for Training
import torch
from torch.utils.data import Dataset

class IMDbDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Create datasets
train_dataset = IMDbDataset(train_encodings, train_labels.tolist())
val_dataset = IMDbDataset(val_encodings, val_labels.tolist())
test_dataset = IMDbDataset(test_encodings, test_labels.tolist())


### TinyBERT model training

In [30]:

from transformers import BertForSequenceClassification, Trainer, TrainingArguments


# Load the pre-trained TinyBERT model
model = BertForSequenceClassification.from_pretrained('huawei-noah/TinyBERT_General_4L_312D', num_labels=2)


training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.6958,0.692542
2,0.6567,0.651861
3,0.515,0.567689


Epoch,Training Loss,Validation Loss
1,0.6958,0.692542
2,0.6567,0.651861
3,0.515,0.567689
4,0.4203,0.537924
5,0.4179,0.534197


TrainOutput(global_step=200, training_loss=0.5589746499061584, metrics={'train_runtime': 598.7853, 'train_samples_per_second': 5.344, 'train_steps_per_second': 0.334, 'total_flos': 17923745280000.0, 'train_loss': 0.5589746499061584, 'epoch': 5.0})

### Evaluation

In [31]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

predictions, true_labels, _ = trainer.predict(test_dataset)
predicted_labels = predictions.argmax(axis=1)
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='binary')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.7650
Precision: 0.7579
Recall: 0.7500
F1 Score: 0.7539
