In [None]:
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import torch
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data_path = "IMDB Dataset.csv"
df = pd.read_csv(data_path)

# Debugging: Check the available columns
print("Columns in dataset:", df.columns)

# Verify required columns
required_columns = ["review", "sentiment"]
for col in required_columns:
    if col not in df.columns:
        raise ValueError(f"Dataset must contain '{col}' column.")

# Rename and filter necessary columns
df = df[['review', 'sentiment']].rename(columns={"review": "text"})

# Map sentiment labels to numerical values
label_map = {"negative": 0, "positive": 1}
df['label'] = df['sentiment'].map(label_map)

print("Data sample:\n", df.head())

# Convert the DataFrame to Hugging Face Dataset
dataset = Dataset.from_pandas(df)
train_test_split = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
validation_dataset = train_test_split['test']

# Load the tokenizer and model
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding="max_length", max_length=128)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_validation = validation_dataset.map(tokenize_function, batched=True)

# Set the format for PyTorch
tokenized_train.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
tokenized_validation.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# Load the pre-trained BERT model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_strategy="steps",
    logging_steps=10,
)

# Compute metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_validation,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train the model
print("Training the model...")
trainer.train()

# Evaluate on validation set
print("Evaluating the model on validation data...")
eval_results = trainer.evaluate()
print("Validation Results:", eval_results)

# Evaluate on a separate test dataset
print("Evaluating on test data...")
test_data_path = "IMDB_Test_Dataset.csv"  # Replace with your test dataset path
test_df = pd.read_csv(test_data_path)

# Ensure test dataset follows the same structure
test_df = test_df[['review', 'sentiment']].rename(columns={"review": "text"})
test_df['label'] = test_df['sentiment'].map(label_map)

# Convert test dataset
test_dataset = Dataset.from_pandas(test_df)
tokenized_test = test_dataset.map(tokenize_function, batched=True)
tokenized_test.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# Make predictions
predictions = trainer.predict(tokenized_test)
pred_labels = predictions.predictions.argmax(-1)
true_labels = predictions.label_ids

# Generate metrics
precision, recall, f2, _ = precision_recall_fscore_support(true_labels, pred_labels, average='binary', beta=2)
accuracy = accuracy_score(true_labels, pred_labels)
conf_matrix = confusion_matrix(true_labels, pred_labels)

print(f"Test Accuracy: {accuracy}")
print(f"Test Precision: {precision}")
print(f"Test Recall: {recall}")
print(f"Test F2 Score: {f2}")

# Print classification report
print("\nClassification Report:\n", classification_report(true_labels, pred_labels, target_names=["negative", "positive"]))

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", xticklabels=["negative", "positive"], yticklabels=["negative", "positive"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()
