<a href="https://colab.research.google.com/github/Ayushverma41/Mental-State-Prediction-using-NLP/blob/main/Code/DeBERTa-v3%20%2B%20Attention%20Pooling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================================================
# INSTALL DEPENDENCIES
# ==========================================================
!pip install transformers torch scikit-learn matplotlib seaborn joblib

# ==========================================================
# IMPORTS
# ==========================================================
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from torch.utils.data import Dataset
import torch.nn as nn
from transformers import (
    AutoTokenizer, AutoModel,
    Trainer, TrainingArguments,
    EarlyStoppingCallback, logging
)
import joblib


In [None]:
# ==========================================================
# LOAD DATA
# ==========================================================
data_path = "/content/drive/MyDrive/Mental State model/Data/Train_Data.csv"
df = pd.read_csv(data_path)

print("üìä Dataset Loaded Successfully!")
print(df.head())

In [None]:
# ==========================================================
# LABEL ENCODING
# ==========================================================
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['status'])
num_labels = len(label_encoder.classes_)

print("üß© Classes:", label_encoder.classes_)
print("Total Classes:", num_labels)

In [None]:
# ==========================================================
# TRAIN-TEST SPLIT
# ==========================================================
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['statement'].tolist(),
    df['label'].tolist(),
    test_size=0.2,
    random_state=42,
    stratify=df['label']
)

In [None]:
# ==========================================================
# TOKENIZATION
# ==========================================================
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

class MentalHealthDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=128)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = MentalHealthDataset(train_texts, train_labels, tokenizer)
val_dataset = MentalHealthDataset(val_texts, val_labels, tokenizer)


In [None]:
# ==========================================================
# MODEL DEFINITION (DeBERTa + Attention Pooling)
# ==========================================================
class DebertaAttentionClassifier(nn.Module):
    def __init__(self, model_name, num_labels,
                 embedding_dim=128, hidden_dim=128, output_dim=5, n_layers=2, dropout=0.3):
        super().__init__()
        self.deberta = AutoModel.from_pretrained(model_name)
        self.embedding_proj = nn.Linear(self.deberta.config.hidden_size, embedding_dim)
        self.attention = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )
        layers = []
        input_dim = embedding_dim
        for _ in range(n_layers - 1):
            layers += [
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout)
            ]
            input_dim = hidden_dim
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.classifier = nn.Sequential(*layers)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        x = self.embedding_proj(last_hidden_state)
        attn_weights = self.attention(x)
        context_vector = torch.sum(attn_weights * x, dim=1)
        logits = self.classifier(context_vector)

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits, labels)

        return {'loss': loss, 'logits': logits}

In [None]:
# ==========================================================
# TRAINING CONFIGURATION
# ==========================================================
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds, average='weighted')
    return {"accuracy": acc, "f1": f1}

model = DebertaAttentionClassifier(
    model_name=model_name,
    num_labels=num_labels,
    embedding_dim=128,
    hidden_dim=128,
    output_dim=5,
    n_layers=2,
    dropout=0.3
)

training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/Mental State model/Model/DeBERTa-v3_Attention Pooling/",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,               # ‚úÖ 5 Epochs
    weight_decay=0.01,                # ‚úÖ Regularization
    load_best_model_at_end=True,      # ‚úÖ Prevent overfitting
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    logging_dir="./logs",
    logging_strategy="epoch",
    seed=42
)

early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

logging.set_verbosity_info()

In [None]:
# ==========================================================
# TRAINING START
# ==========================================================
trainer.train()

In [None]:
# ==========================================================
# SAVE TRAINED MODEL & LABEL ENCODER
# ==========================================================
save_path = "/content/drive/MyDrive/Mental State model/Model/DeBERTa-v3_Attention Pooling/"
os.makedirs(save_path, exist_ok=True)

trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
joblib.dump(label_encoder, os.path.join(save_path, "label_encoder.pkl"))

print(f"\n‚úÖ Model, tokenizer, and label encoder saved successfully at:\n{save_path}")


In [None]:
# ==========================================================
# MODEL EVALUATION & VISUALIZATION
# ==========================================================
img_path = "/content/drive/MyDrive/Mental State model/Images/DeBERTa-v3_Attention Pooling/"
os.makedirs(img_path, exist_ok=True)

train_metrics = trainer.evaluate(train_dataset)
val_metrics = trainer.evaluate(val_dataset)

train_acc = train_metrics["eval_accuracy"]
val_acc   = val_metrics["eval_accuracy"]
train_f1  = train_metrics["eval_f1"]
val_f1    = val_metrics["eval_f1"]
train_loss = train_metrics["eval_loss"]
val_loss   = val_metrics["eval_loss"]

print("\nüìà Evaluation Results:")
print(f"Training Accuracy: {train_acc:.4f}, Validation Accuracy: {val_acc:.4f}")
print(f"Training F1-score: {train_f1:.4f}, Validation F1-score: {val_f1:.4f}")
print(f"Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

# ==== Accuracy & F1 Bar Chart ====
plt.figure(figsize=(7,5))
metrics = ["Accuracy", "F1-score"]
train_values = [train_acc, train_f1]
val_values = [val_acc, val_f1]
x = np.arange(len(metrics))
width = 0.35

plt.bar(x - width/2, train_values, width, label="Training", color="skyblue")
plt.bar(x + width/2, val_values, width, label="Validation", color="lightgreen")
plt.xticks(x, metrics)
plt.ylabel("Score")
plt.title("Training vs Validation Accuracy & F1-score")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(img_path, "Accuracy_F1_Comparison.png"))
plt.show()

# ==== Confusion Matrix & Heatmap ====
predictions = trainer.predict(val_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
true_labels = predictions.label_ids

cm = confusion_matrix(true_labels, pred_labels)
class_names = label_encoder.classes_

plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix - Validation Data")
plt.tight_layout()
plt.savefig(os.path.join(img_path, "Confusion_Matrix.png"))
plt.show()

print("\nüìã Classification Report:\n")
print(classification_report(true_labels, pred_labels, target_names=class_names))

# ==== Loss Visualization ====
if hasattr(trainer.state, "log_history"):
    train_losses = [x["loss"] for x in trainer.state.log_history if "loss" in x]
    eval_losses  = [x["eval_loss"] for x in trainer.state.log_history if "eval_loss" in x]

    plt.figure(figsize=(8,5))
    plt.plot(train_losses, label="Training Loss", marker='o')
    plt.plot(eval_losses, label="Validation Loss", marker='o')
    plt.title("Training vs Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(img_path, "Loss_Comparison.png"))
    plt.show()

print(f"\n‚úÖ All visualizations saved in:\n{img_path}")

In [None]:
# ==========================================================
# SINGLE SENTENCE PREDICTION
# ==========================================================
def predict_sentence(sentence):
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True, max_length=128)
    outputs = model(**inputs)
    preds = torch.argmax(outputs['logits'], dim=1)
    return label_encoder.inverse_transform(preds.detach().numpy())[0]

# Example
print("\nüîç Example Prediction:")
print(predict_sentence("I feel exhausted and mentally drained these days."))

**Testing**

In [None]:
# ==========================================================
# IMPORTS
# ==========================================================
import torch
import pandas as pd
import numpy as np
from transformers import AutoTokenizer
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder
import joblib
import os
from torch import nn
from transformers import AutoModel

# ==========================================================
# LOAD TRAINED MODEL, TOKENIZER, AND ENCODER
# ==========================================================
model_path = "/content/drive/MyDrive/Mental State model/Model/DeBERTa-v3_Attention Pooling/"

tokenizer = AutoTokenizer.from_pretrained(model_path)
label_encoder = joblib.load(os.path.join(model_path, "label_encoder.pkl"))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================================
# MODEL DEFINITION (same architecture as training)
# ==========================================================
class DebertaAttentionClassifier(nn.Module):
    def __init__(self, model_name, num_labels,
                 embedding_dim=128, hidden_dim=128, output_dim=5, n_layers=2, dropout=0.3):
        super().__init__()
        self.deberta = AutoModel.from_pretrained(model_name)
        self.embedding_proj = nn.Linear(self.deberta.config.hidden_size, embedding_dim)
        self.attention = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )
        layers = []
        input_dim = embedding_dim
        for _ in range(n_layers - 1):
            layers += [
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout)
            ]
            input_dim = hidden_dim
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.classifier = nn.Sequential(*layers)

    def forward(self, input_ids, attention_mask):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        x = self.embedding_proj(last_hidden_state)
        attn_weights = self.attention(x)
        context_vector = torch.sum(attn_weights * x, dim=1)
        logits = self.classifier(context_vector)
        return logits

# Load model
model_name = "microsoft/deberta-v3-base"
model = DebertaAttentionClassifier(model_name=model_name, num_labels=len(label_encoder.classes_))
model.load_state_dict(torch.load(os.path.join(model_path, "pytorch_model.bin"), map_location=device))
model.to(device)
model.eval()

print("‚úÖ Model, tokenizer, and label encoder loaded successfully!")

# ==========================================================
# LOAD TEST DATA
# ==========================================================
test_path = "/content/drive/MyDrive/Mental State model/Data/Test_Data.csv"
test_df = pd.read_csv(test_path)
print(f"üìÇ Test Data Loaded: {test_df.shape[0]} samples")

# ==========================================================
# TOKENIZE TEST DATA
# ==========================================================
class MentalHealthTestDataset(Dataset):
    def __init__(self, texts, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=128)

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

    def __len__(self):
        return len(self.encodings['input_ids'])

test_dataset = MentalHealthTestDataset(test_df['statement'].tolist(), tokenizer)

# ==========================================================
# PREDICT ON TEST DATA
# ==========================================================
predictions = []
batch_size = 16

with torch.no_grad():
    for i in range(0, len(test_dataset), batch_size):
        batch = test_dataset[i:i+batch_size]
        input_ids = torch.stack([x['input_ids'] for x in batch]).to(device)
        attention_mask = torch.stack([x['attention_mask'] for x in batch]).to(device)

        logits = model(input_ids, attention_mask)
        preds = torch.argmax(logits, dim=1)
        predictions.extend(preds.cpu().numpy())

# Decode predicted labels
predicted_labels = label_encoder.inverse_transform(predictions)
test_df["Predicted_Status_DeBERTa_v3_Attention_Pooling"] = predicted_labels

# ==========================================================
# SAVE OUTPUT TO CSV
# ==========================================================
output_path = "/content/drive/MyDrive/Mental State model/Data/DeBERTa-v3_Attention_Pooling_Predictions.csv"
test_df.to_csv(output_path, index=False)
print(f"‚úÖ Predictions saved successfully to:\n{output_path}")

# ==========================================================
# SINGLE SENTENCE PREDICTION FUNCTION
# ==========================================================
def predict_sentence(sentence):
    model.eval()
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True, max_length=128).to(device)
    with torch.no_grad():
        logits = model(**inputs)
        pred = torch.argmax(logits, dim=1).cpu().numpy()[0]
    return label_encoder.inverse_transform([pred])[0]

# Example Usage
example = "I feel anxious and my mind won‚Äôt stop racing."
predicted_class = predict_sentence(example)
print(f"\nüîç Input: {example}\nüß† Predicted Mental State: {predicted_class}")


**Evaluation**

In [None]:
# ==========================================================
# IMPORTS
# ==========================================================
import os
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report, log_loss
from transformers import AutoTokenizer
from torch.utils.data import Dataset
from torch import nn
from transformers import AutoModel
import joblib

# ==========================================================
# PATH CONFIG
# ==========================================================
model_path = "/content/drive/MyDrive/Mental State model/Model/DeBERTa-v3_Attention Pooling/"
img_path = "/content/drive/MyDrive/Mental State model/Images/DeBERTa-v3_Attention Pooling/"
os.makedirs(img_path, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================================
# LOAD TOKENIZER, MODEL, LABEL ENCODER
# ==========================================================
tokenizer = AutoTokenizer.from_pretrained(model_path)
label_encoder = joblib.load(os.path.join(model_path, "label_encoder.pkl"))

# Model Architecture (same as used during training)
class DebertaAttentionClassifier(nn.Module):
    def __init__(self, model_name, num_labels,
                 embedding_dim=128, hidden_dim=128, output_dim=5, n_layers=2, dropout=0.3):
        super().__init__()
        self.deberta = AutoModel.from_pretrained(model_name)
        self.embedding_proj = nn.Linear(self.deberta.config.hidden_size, embedding_dim)
        self.attention = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )
        layers = []
        input_dim = embedding_dim
        for _ in range(n_layers - 1):
            layers += [
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout)
            ]
            input_dim = hidden_dim
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.classifier = nn.Sequential(*layers)

    def forward(self, input_ids, attention_mask):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        x = self.embedding_proj(last_hidden_state)
        attn_weights = self.attention(x)
        context_vector = torch.sum(attn_weights * x, dim=1)
        logits = self.classifier(context_vector)
        return logits

# Load model weights
model_name = "microsoft/deberta-v3-base"
model = DebertaAttentionClassifier(model_name, num_labels=len(label_encoder.classes_))
model.load_state_dict(torch.load(os.path.join(model_path, "pytorch_model.bin"), map_location=device))
model.to(device)
model.eval()

print("‚úÖ Model loaded successfully!")

# ==========================================================
# LOAD TRAINING AND TEST DATA
# ==========================================================
train_data_path = "/content/drive/MyDrive/Mental State model/Data/Train_Data.csv"
test_data_path = "/content/drive/MyDrive/Mental State model/Data/Test_Data.csv"

train_df = pd.read_csv(train_data_path)
test_df = pd.read_csv(test_data_path)

# Encode actual labels for test
train_df["encoded"] = label_encoder.transform(train_df["status"])
test_df["encoded"] = label_encoder.transform(test_df["status"])

# ==========================================================
# TOKENIZE & PREDICT FUNCTION
# ==========================================================
def get_predictions(df):
    preds, probs = [], []
    batch_size = 16
    with torch.no_grad():
        for i in range(0, len(df), batch_size):
            batch_texts = df["statement"].iloc[i:i+batch_size].tolist()
            enc = tokenizer(batch_texts, return_tensors="pt", truncation=True,
                            padding=True, max_length=128).to(device)
            logits = model(**enc)
            pred_probs = torch.softmax(logits, dim=1)
            pred_classes = torch.argmax(pred_probs, dim=1)
            preds.extend(pred_classes.cpu().numpy())
            probs.extend(pred_probs.cpu().numpy())
    return np.array(preds), np.array(probs)

# ==========================================================
# GET TRAIN & TEST PREDICTIONS
# ==========================================================
train_preds, train_probs = get_predictions(train_df)
test_preds, test_probs = get_predictions(test_df)

# Decode predicted labels
train_df["Predicted_Status"] = label_encoder.inverse_transform(train_preds)
test_df["Predicted_Status"] = label_encoder.inverse_transform(test_preds)

# ==========================================================
# METRICS CALCULATION
# ==========================================================
train_acc = accuracy_score(train_df["encoded"], train_preds)
test_acc  = accuracy_score(test_df["encoded"], test_preds)
train_f1  = f1_score(train_df["encoded"], train_preds, average="weighted")
test_f1   = f1_score(test_df["encoded"], test_preds, average="weighted")
train_loss = log_loss(train_df["encoded"], train_probs)
test_loss  = log_loss(test_df["encoded"], test_probs)

print("\nüìä Model Performance Summary:")
print(f"Training Accuracy: {train_acc:.4f}")
print(f"Testing Accuracy:  {test_acc:.4f}")
print(f"Training F1-Score: {train_f1:.4f}")
print(f"Testing F1-Score:  {test_f1:.4f}")
print(f"Training Loss:     {train_loss:.4f}")
print(f"Testing Loss:      {test_loss:.4f}")

# ==========================================================
# 1Ô∏è‚É£ ACCURACY BAR CHART (TRAIN vs TEST)
# ==========================================================
plt.figure(figsize=(7,5))
metrics = ["Accuracy", "F1-Score", "Loss"]
train_values = [train_acc, train_f1, train_loss]
test_values = [test_acc, test_f1, test_loss]
x = np.arange(len(metrics))
width = 0.35

plt.bar(x - width/2, train_values, width, label="Training", color="skyblue")
plt.bar(x + width/2, test_values, width, label="Testing", color="lightgreen")
plt.xticks(x, metrics)
plt.ylabel("Score")
plt.title("Training vs Testing Performance Comparison")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(img_path, "Train_Test_Accuracy_Comparison.png"))
plt.show()

# ==========================================================
# 2Ô∏è‚É£ CONFUSION MATRIX (TRAINING)
# ==========================================================
train_cm = confusion_matrix(train_df["encoded"], train_preds)
plt.figure(figsize=(8,6))
sns.heatmap(train_cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix - Training Data")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.savefig(os.path.join(img_path, "Confusion_Matrix_Training.png"))
plt.show()

# ==========================================================
# 3Ô∏è‚É£ CONFUSION MATRIX (TESTING)
# ==========================================================
test_cm = confusion_matrix(test_df["encoded"], test_preds)
plt.figure(figsize=(8,6))
sns.heatmap(test_cm, annot=True, fmt="d", cmap="Greens",
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix - Testing Data")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.savefig(os.path.join(img_path, "Confusion_Matrix_Testing.png"))
plt.show()

# ==========================================================
# CLASSIFICATION REPORTS
# ==========================================================
print("\nüìã Classification Report (Testing):\n")
print(classification_report(test_df["encoded"], test_preds, target_names=label_encoder.classes_))

# ==========================================================
# SAVE TEST PREDICTIONS TO CSV
# ==========================================================
output_path = "/content/drive/MyDrive/Mental State model/Data/DeBERTa-v3_Attention_Pooling_Predictions.csv"
test_df.to_csv(output_path, index=False)
print(f"\n‚úÖ Predictions with actual vs predicted labels saved at:\n{output_path}")

# ==========================================================
# 4Ô∏è‚É£ LOSS VISUALIZATION (TRAIN vs TEST)
# ==========================================================
plt.figure(figsize=(6,5))
plt.bar(["Training Loss", "Testing Loss"], [train_loss, test_loss],
        color=["skyblue", "lightcoral"])
plt.ylabel("Loss")
plt.title("Training vs Testing Loss Comparison")
plt.tight_layout()
plt.savefig(os.path.join(img_path, "Loss_Comparison.png"))
plt.show()

print(f"\n‚úÖ All images saved at:\n{img_path}")
