In [11]:
pip install torch transformers pandas scikit-learn




In [12]:
# Loading dependencies
import warnings
warnings.filterwarnings("ignore")
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd


In [30]:
# Loading IMDB Movie Reviews dataset
df = pd.read_csv('data\IMDB_dataset.csv')

#Changing sentiments --> "Positive", "Negative" into "0" and "1" respectively into a new column
df['label']=df['sentiment'].apply(lambda x: 0 if x == 'positive' else 1)
df

Unnamed: 0,review,sentiment,label
0,One of the other reviewers has mentioned that ...,positive,0
1,A wonderful little production. <br /><br />The...,positive,0
2,I thought this was a wonderful way to spend ti...,positive,0
3,Basically there's a family where a little boy ...,negative,1
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,0
...,...,...,...
5995,This was the first movie I ever saw Ashley Jud...,positive,0
5996,I just recently saw this movie in hopes of see...,positive,0
5997,I remember watching this movie when I was youn...,negative,1
5998,Annie's wig does not look good. she is not cut...,negative,1


In [24]:
# Setting up Tokenizer and Data Splitting
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# Tokenizing reviews and creating input tensors
tokenized_reviews = df['review'].apply(lambda x: tokenizer.encode(x, add_special_tokens=True, truncation=True, max_length=512))
input_ids = torch.nn.utils.rnn.pad_sequence([torch.tensor(x) for x in tokenized_reviews], batch_first=True, padding_value=0)

labels = torch.tensor(df['label'].values)


In [25]:
#splitting train test with 20% test data
train_inputs, test_inputs, train_labels, test_labels = train_test_split(input_ids, labels, test_size=0.2, random_state=42)

In [26]:
#creating dataLoader for training and testing and dividing it into batch due to size differences
train_dataset = TensorDataset(train_inputs, train_labels)
test_dataset = TensorDataset(test_inputs, test_labels)

batch_size = 4
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [27]:
# Model Initialization and Training with 3 epochs
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
optimizer = AdamW(model.parameters(), lr=2e-5)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 3
for epoch in range(epochs):
    model.train()
    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1: 100%|██████████| 1200/1200 [08:16<00:00,  2.41it/s]
Epoch 2: 100%|██████████| 1200/1200 [08:32<00:00,  2.34it/s]
Epoch 3: 100%|██████████| 1200/1200 [08:31<00:00,  2.35it/s]


In [28]:
# Model Evaluation
model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    # creating and storing predictions
    for batch in tqdm(test_dataloader, desc="Evaluating"):
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        logits = outputs.logits
        predictions.extend(logits.argmax(dim=1).cpu().numpy())
        true_labels.extend(labels.cpu().numpy())


Evaluating: 100%|██████████| 300/300 [00:44<00:00,  6.76it/s]


In [29]:
# Calculate and print accuracy and classification report
accuracy = accuracy_score(true_labels, predictions)
report = classification_report(true_labels, predictions)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)


Accuracy: 0.8891666666666667
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.80      0.87       575
           1       0.84      0.97      0.90       625

    accuracy                           0.89      1200
   macro avg       0.90      0.89      0.89      1200
weighted avg       0.90      0.89      0.89      1200

