In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification
from torch.optim import AdamW
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, classification_report, r2_score
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load dataset
file_path = "/content/ufc_comments.csv"
df = pd.read_csv(file_path)

In [None]:
# Ensure required columns exist
required_columns = {'Comment', 'Sentiment'}
if not required_columns.issubset(df.columns):
    print("Error: Required columns not found in dataset")
    exit()

In [None]:
# Drop unnecessary columns
df = df[['Comment', 'Sentiment']].dropna()

# Convert labels to numerical format
sentiment_mapping = {'Positive': 1, 'Negative': 0, 'Neutral': 2}
df['Sentiment_Label'] = df['Sentiment'].map(sentiment_mapping)

In [None]:
# Check for invalid labels
if df['Sentiment_Label'].isnull().any():
    print("Error: Dataset contains invalid sentiment labels")
    exit()

In [None]:
# Tokenization
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
tokens = tokenizer(df["Comment"].tolist(), padding=True, truncation=True, max_length=128, return_tensors="pt")


In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(tokens['input_ids'], df['Sentiment_Label'].values, test_size=0.2, random_state=42)
train_data = list(zip(X_train, y_train))
test_data = list(zip(X_test, y_test))

In [None]:
# Custom Dataset Class
class SentimentDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx][0], torch.tensor(self.data[idx][1], dtype=torch.long)

train_loader = DataLoader(SentimentDataset(train_data), batch_size=16, shuffle=True)
test_loader = DataLoader(SentimentDataset(test_data), batch_size=16)


In [None]:
# Load BERT Model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
from torch.optim import AdamW
from torch.cuda.amp import GradScaler, autocast
from transformers import get_scheduler

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Update for 5 epochs
num_training_steps = len(train_loader) * 5  # 5 epochs
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

# Mixed Precision
scaler = GradScaler()

# Improved Training Loop for 5 epochs
def train_model(model, train_loader, epochs=5, accumulation_steps=2):  # 5 epochs
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        correct, total = 0, 0  # Track accuracy

        for step, batch in enumerate(train_loader):
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)

            with autocast():
                outputs = model(inputs, labels=labels)
                loss = outputs.loss / accumulation_steps

            scaler.scale(loss).backward()

            if (step + 1) % accumulation_steps == 0 or (step + 1) == len(train_loader):
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                lr_scheduler.step()

            total_loss += loss.item() * accumulation_steps

            predictions = torch.argmax(outputs.logits, dim=1)
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

        avg_loss = total_loss / len(train_loader)
        accuracy = correct / total * 100
        print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}, Accuracy = {accuracy:.2f}%")

# Run optimized training
train_model(model, train_loader)


In [None]:
from sklearn.metrics import confusion_matrix, classification_report, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F

# Updated Evaluation Function
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predictions = torch.argmax(F.softmax(outputs.logits, dim=1), dim=1)
            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Metrics
    accuracy = sum(p == l for p, l in zip(all_preds, all_labels)) / len(all_labels)
    r2 = r2_score(all_labels, all_preds)
    cm = confusion_matrix(all_labels, all_preds)
    report = classification_report(all_labels, all_preds, target_names=['Negative', 'Positive', 'Neutral'])

    print(f"\nTest Accuracy: {accuracy:.4f}")
    print(f"R² Score: {r2:.4f}")
    print("\nClassification Report:\n", report)

    # Plot Confusion Matrix
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Negative', 'Positive', 'Neutral'],
                yticklabels=['Negative', 'Positive', 'Neutral'])
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

# Call the updated function
evaluate_model(model, test_loader)


In [None]:
# Predict sentiment for new comments
def predict_sentiment(model, comments):
    model.eval()
    tokens = tokenizer(comments, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(tokens['input_ids'])
        predictions = torch.argmax(F.softmax(outputs.logits, dim=1), dim=1).cpu().numpy()
    sentiment_labels = {0: 'Negative', 1: 'Positive', 2: 'Neutral'}
    return [sentiment_labels[pred] for pred in predictions]

In [None]:
# Example predictions
new_comments = [
    "This fight good !"

  ]
predictions = predict_sentiment(model, new_comments)
for comment, sentiment in zip(new_comments, predictions):
    print(f"Comment: {comment} => Sentiment: {sentiment}")


In [None]:
!jupyter nbconvert --ClearMetadataPreprocessor.enabled=True --clear-output --inplace UFC_sentiment_analysis_.ipynb
