In [20]:
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification, Trainer, TrainingArguments, BertTokenizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [21]:
# Load dataset
df = pd.read_csv('archive/IMDB Dataset.csv')

In [22]:
# Encode labels (assuming binary classification for sentiment analysis)
df['label'] = df['sentiment'].apply(lambda x: 1 if x == 'positive' else 0) # 1 for positive, 0 for negative

In [23]:
# Split the data into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2) # 80% training, 20% validation

In [24]:
# Convert pandas DataFrame to Hugging Face Dataset format
train_dataset = Dataset.from_pandas(train_df) 
val_dataset = Dataset.from_pandas(val_df)   

In [25]:
# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # Using the BERT tokenizer

In [26]:
# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples['review'], padding="max_length", truncation=True) # Tokenize the reviews and pad them to the maximum length

In [27]:
# Tokenize the datasets
train_dataset = train_dataset.map(tokenize_function, batched=True) 
val_dataset = val_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/40000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [28]:
# Set the format of the datasets to PyTorch tensors (for the model)
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [29]:
# Load the model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2) # Binary classification

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [30]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',  # output directory
    num_train_epochs=3, # total number of training epochs
    per_device_train_batch_size=8, # batch size per device during training
    per_device_eval_batch_size=8, # batch size for evaluation
    warmup_steps=500, # number of warmup steps for learning rate scheduler
    weight_decay=0.01, # strength of weight decay (L2 regularization) 
    logging_dir='./logs', # directory for storing logs
    logging_steps=10, # log every 10 steps
    evaluation_strategy="epoch" # evaluate at the end of each epoch
)



In [31]:
# Define compute_metrics function
def compute_metrics(p):
    preds = p.predictions.argmax(-1)  # Predicted labels
    labels = p.label_ids # True labels
    accuracy = accuracy_score(labels, preds) # Accuracy
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary') # Precision, Recall, F1 Score
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [32]:
# Initialize the Trainer
trainer = Trainer(
    model=model, 
    args=training_args,
    train_dataset=train_dataset, 
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics # Compute metrics
)

In [33]:
# Train the model
trainer.train() # This will take a while to run depending on the number of epochs and the size of the dataset 

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3352,0.255632,0.9271,0.939437,0.914269,0.926682
2,0.1704,0.244981,0.9376,0.940355,0.935503,0.937923
3,0.0644,0.320947,0.9382,0.940251,0.936892,0.938569


TrainOutput(global_step=15000, training_loss=0.2002308165420778, metrics={'train_runtime': 15952.456, 'train_samples_per_second': 7.522, 'train_steps_per_second': 0.94, 'total_flos': 3.15733266432e+16, 'train_loss': 0.2002308165420778, 'epoch': 3.0})

In [34]:
# Evaluate the model
eval_results = trainer.evaluate() # Evaluate the model on the validation set   

In [35]:
print(f"Validation Loss: {eval_results['eval_loss']}")
print(f"Validation Accuracy: {eval_results['eval_accuracy']}")
print(f"Validation Precision: {eval_results['eval_precision']}")
print(f"Validation Recall: {eval_results['eval_recall']}")
print(f"Validation F1 Score: {eval_results['eval_f1']}")

Validation Loss: 0.3209468126296997
Validation Accuracy: 0.9382
Validation Precision: 0.9402509460266879
Validation Recall: 0.9368922405239135
Validation F1 Score: 0.9385685884691849


In [36]:
# Saving the model
model_path = 'bert_sentiment_analysis_model'
model.save_pretrained(model_path)