In [3]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load the dataset
df = pd.read_csv('dataset.csv')
df = df[['Comment', 'Sentiment']].dropna()

# Check label distribution
print(df['Sentiment'].value_counts())

Sentiment
2    3110
1    2012
0    1840
Name: count, dtype: int64


In [5]:
# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Custom Dataset class for tokenized inputs
class CommentDataset(Dataset):
    def __init__(self, comments, labels, tokenizer, max_len=128):
        self.encodings = tokenizer(comments, truncation=True, padding=True, max_length=max_len)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


In [6]:
# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['Comment'].tolist(), 
    df['Sentiment'].tolist(), 
    test_size=0.2, 
    random_state=42, 
    stratify=df['Sentiment']
)

# Create Dataset objects
train_dataset = CommentDataset(train_texts, train_labels, tokenizer)
test_dataset = CommentDataset(test_texts, test_labels, tokenizer)


In [7]:
# Load the BERT model for sequence classification with 3 labels
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10
)


In [9]:
# 5. Define compute_metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [10]:
# 6. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

# 7. Train
trainer.train()

Step,Training Loss
10,1.078
20,1.1111
30,1.0668
40,1.0206
50,0.979
60,0.9276
70,0.9099
80,0.7788
90,0.6776
100,0.9599


TrainOutput(global_step=1047, training_loss=0.4042539799224795, metrics={'train_runtime': 394.47, 'train_samples_per_second': 42.353, 'train_steps_per_second': 2.654, 'total_flos': 1098958967495424.0, 'train_loss': 0.4042539799224795, 'epoch': 3.0})

In [12]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score


In [13]:
# Evaluate the model
eval_result = trainer.evaluate()
print("Evaluation results:")
print(eval_result)


Evaluation results:
{'eval_loss': 0.8226653337478638, 'eval_accuracy': 0.801148600143575, 'eval_f1': 0.7994659879691384, 'eval_precision': 0.800078890264438, 'eval_recall': 0.801148600143575, 'eval_runtime': 11.2776, 'eval_samples_per_second': 123.519, 'eval_steps_per_second': 1.951, 'epoch': 3.0}


In [14]:
# Get predictions
predictions_output = trainer.predict(test_dataset)
preds = predictions_output.predictions.argmax(-1)

# Print classification report
from sklearn.metrics import classification_report

print("Classification Report:")
print(classification_report(test_labels, preds, target_names=['Negative', 'Neutral', 'Positive']))


Classification Report:
              precision    recall  f1-score   support

    Negative       0.73      0.76      0.75       368
     Neutral       0.78      0.69      0.74       403
    Positive       0.85      0.90      0.87       622

    accuracy                           0.80      1393
   macro avg       0.79      0.78      0.78      1393
weighted avg       0.80      0.80      0.80      1393



In [15]:
# Save the model and tokenizer
model.save_pretrained("./final_model")
tokenizer.save_pretrained("./final_model")


('./final_model\\tokenizer_config.json',
 './final_model\\special_tokens_map.json',
 './final_model\\vocab.txt',
 './final_model\\added_tokens.json')