In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
df = pd.read_csv('dataset.csv')  
df = df.dropna()  # Drop rows with missing values
df['Rating'] = df['Rating'].astype(int)  # Ensure the Rating column is of integer type

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

# Tokenize and preprocess the data
def tokenize_reviews(reviews, max_length=128):
    input_ids = []
    attention_masks = []

    for review in reviews:
        encoded_dict = tokenizer.encode_plus(
            review,
            add_special_tokens=True,
            max_length=max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
        )

        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    return torch.cat(input_ids, dim=0), torch.cat(attention_masks, dim=0)

# Tokenize training data
train_input_ids, train_attention_masks = tokenize_reviews(train_df['Review'].values)
train_labels = torch.tensor(train_df['Rating'].values - 1)  # Subtract 1 to make it in the range [0, 4]

# Tokenize testing data
test_input_ids, test_attention_masks = tokenize_reviews(test_df['Review'].values)
test_labels = torch.tensor(test_df['Rating'].values - 1)  # Subtract 1 to make it in the range [0, 4]

# Create DataLoader for training and testing
batch_size = 16
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Set up the optimizer and loss function
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()

# Training loop
num_epochs = 1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    correct_predictions_train = 0

    for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs} - Training'):
        input_ids, attention_masks, labels = batch
        input_ids, attention_masks, labels = input_ids.to(device), attention_masks.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask=attention_masks, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        predictions = torch.argmax(outputs.logits, dim=1)
        correct_predictions_train += torch.sum(predictions == labels).item()

        loss.backward()
        optimizer.step()

    average_loss_train = total_loss / len(train_loader)
    accuracy_train = correct_predictions_train / len(train_dataset)

    print(f'Epoch {epoch + 1}/{num_epochs}, Training Loss: {average_loss_train:.4f}, Training Accuracy: {accuracy_train:.4f}')

    # Evaluate the model on the validation set
    model.eval()
    total_loss_val = 0.0
    correct_predictions_val = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc=f'Epoch {epoch + 1}/{num_epochs} - Evaluating'):
            input_ids, attention_masks, labels = batch
            input_ids, attention_masks, labels = input_ids.to(device), attention_masks.to(device), labels.to(device)

            outputs = model(input_ids, attention_mask=attention_masks, labels=labels)
            loss_val = outputs.loss
            total_loss_val += loss_val.item()

            predictions_val = torch.argmax(outputs.logits, dim=1)
            correct_predictions_val += torch.sum(predictions_val == labels).item()

            all_predictions.extend(predictions_val.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    average_loss_val = total_loss_val / len(test_loader)
    accuracy_val = correct_predictions_val / len(test_dataset)

# Print the final metrics after all epochs
print(f'Final Training Loss: {average_loss_train:.4f}, Final Training Accuracy: {accuracy_train:.4f}')
print(f'Final Validation Loss: {average_loss_val:.4f}, Final Validation Accuracy: {accuracy_val:.4f}')

# Calculate and print confusion matrix
conf_matrix = confusion_matrix(all_labels, all_predictions)
print('Confusion Matrix:\n', conf_matrix)

# Calculate and print classification report
classification_report_str = classification_report(all_labels, all_predictions)
print('Final Classification Report:\n', classification_report_str)

# Save the trained model
model.save_pretrained('bert_sentiment_model')
tokenizer.save_pretrained('bert_sentiment_model')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/1 - Training: 100%|██████████| 1991/1991 [10:54<00:00,  3.04it/s]


Epoch 1/1, Training Loss: 0.6746, Training Accuracy: 0.7318


Epoch 1/1 - Evaluating: 100%|██████████| 498/498 [00:57<00:00,  8.72it/s]


Final Training Loss: 0.6746, Final Training Accuracy: 0.7318
Final Validation Loss: 0.3875, Final Validation Accuracy: 0.8670
Confusion Matrix:
 [[1305   91    7    4    8]
 [  78  458   47    7    2]
 [   6   97  705  120   33]
 [   1    9   71 1542  327]
 [   5    3    2  141 2895]]
Final Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.92      0.93      1415
           1       0.70      0.77      0.73       592
           2       0.85      0.73      0.79       961
           3       0.85      0.79      0.82      1950
           4       0.89      0.95      0.92      3046

    accuracy                           0.87      7964
   macro avg       0.84      0.83      0.84      7964
weighted avg       0.87      0.87      0.87      7964



('bert_sentiment_model/tokenizer_config.json',
 'bert_sentiment_model/special_tokens_map.json',
 'bert_sentiment_model/vocab.txt',
 'bert_sentiment_model/added_tokens.json')