# 🧠 Fine-Tuning LLaMA for Mental Health Text Classification

## 🧬 NLP Meets Mental Health | 🤗 Transformers + 🦙 LLaMA

---

### 📚 Overview

This project explores fine-tuning the **Meta LLaMA model** on mental health-related text to classify emotional or psychological states using **transformers**, **LoRA**, and **custom datasets**.

---

### 🔖 Sections
- ⚙️ Setup Environment (GPU, Dependencies)
- 🗃️ Load and Preprocess Mental Health Dataset
- 🧠 Fine-Tune LLaMA with LoRA
- 🧪 Evaluate Model Performance
- 📈 Visualize and Interpret Results

## ⚙️ Setup Environment

### 1️⃣ Installing Required Packages
We install the necessary libraries including `transformers`, `datasets`, `peft` for LoRA fine-tuning ...

In [None]:
!pip install --upgrade transformers datasets evaluate accelerate pipeline bitsandbytes
!pip install  pandas scikit-learn
!pip install torch torchdata
!pip install peft
!pip install loralib
!pip install huggingface_hub

### 2️⃣ Importing Libraries
We import all essential modules for fine-tuning, tokenization, and evaluation.

In [None]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)
from datasets import Dataset, load_dataset, DatasetDict
from sklearn.metrics import accuracy_score, f1_score
import evaluate

In [None]:
import pandas as pd
import numpy as np
import tqdm
import random
from typing import List
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    GenerationConfig,
    TrainingArguments,
    Trainer,
    pipeline,
    BitsAndBytesConfig,
    DataCollatorForSeq2Seq,
    DataCollatorForLanguageModeling,
    DataCollatorWithPadding
)
import torch
import evaluate
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    PeftModel,
    PeftConfig,
)
from huggingface_hub import notebook_login

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### 3️⃣ 🔐 Connecting to Hugging Face Hub
We log in to Hugging Face to access models and upload results securely.

In [None]:
from huggingface_hub import notebook_login
notebook_login()

## 🧹 Data Preparation

### 1️⃣ Loading and Cleaning the Data
We load the mental health dataset and apply basic cleaning to handle null values, formatting, and noise.

In [None]:
dataset = load_dataset("HajarGH/sentiment-analysis-for-mental-health")

In [None]:
dataset

### 2️⃣ 🏷️ Encoding the Labels
The categorical target labels (e.g., depression, anxiety, etc.) are encoded into numerical format for training.

In [None]:
# 2. Encode labels
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df = dataset['train'].to_pandas()

df['label'] = label_encoder.fit_transform(df['status'])
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label mapping:", label_mapping)

### 3️⃣ ✂️ Splitting the Dataset
We split the data into training, validation, and test sets.

In [None]:
# 3. Split data
from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

### 4️⃣ 🧼 Tokenization
We tokenize the text data using the appropriate tokenizer for the LLaMA model, preparing inputs for training.

In [None]:
# 4. Load tokenizer
model_name = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


# 5. Tokenization
def tokenize_function(examples):
    return tokenizer(
        examples["statement"],
        padding="max_length",
        truncation=True,
        max_length=256,
        return_tensors="pt"
    )

train_dataset = Dataset.from_pandas(train_df).map(tokenize_function, batched=True)
test_dataset = Dataset.from_pandas(test_df).map(tokenize_function, batched=True)
val_dataset = Dataset.from_pandas(val_df).map(tokenize_function, batched=True)

## 🧠 Model & Training Pipeline



### 1️⃣ Load Pretrained LLaMA Model with Classification Head
We load a pretrained LLaMA model and attach a classification head suitable for our task.

In [None]:
# 6. Load model with classification head
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_mapping),
    problem_type="single_label_classification",
    device_map="auto"
)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
print(model)

### 2️⃣ 🧩 Prepare for PEFT (LoRA)
We integrate LoRA using the PEFT library to fine-tune only a small set of parameters efficiently.

In [None]:
# 7. Prepare for PEFT
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS"  # for sequence classification
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

### 3️⃣ 📊 Define Evaluation Metrics
Set up accuracy, F1, precision, and recall metrics to evaluate model performance meaningfully.

In [None]:
# 8. Metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted")
    }

### 4️⃣ 🔁 Training the Model
Fine-tune the model on the training set using gradient accumulation and mixed-precision training.

In [None]:
# 9. Training
trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./Mental-health-classification",
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        num_train_epochs=3,
        metric_for_best_model="accuracy",
        load_best_model_at_end=True,
        push_to_hub=True,
        fp16=True
    ),
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
)

trainer.train()

# 10. Evaluation
results = trainer.evaluate()
print(f"Final accuracy: {results['eval_accuracy']:.4f}")
print(f"Final F1 score: {results['eval_f1']:.4f}")

### 5️⃣ ✅ Evaluating the Model
Run the model on the test set and compute the defined metrics to assess performance.

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
from peft import PeftModel
from datasets import Dataset
from sklearn.metrics import classification_report
import numpy as np

# 1. Load tokenizer & base pretrained model
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

# Important: Load model WITHOUT PEFT for base evaluation
model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    num_labels=len(label_encoder.classes_)
)
model.config.pad_token_id = tokenizer.pad_token_id

# 2. Run inference on the test dataset
trainer = Trainer(model=model, tokenizer=tokenizer)
predictions = trainer.predict(test_dataset)

# 3. Compute metrics
preds = np.argmax(predictions.predictions, axis=-1)
labels = predictions.label_ids

# 4. Display classification report
print(classification_report(labels, preds, target_names=label_encoder.classes_))

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
from peft import PeftModel
from datasets import Dataset
from sklearn.metrics import classification_report
import numpy as np

# 1. Load base model and tokenizer
model_path = "HajarGH/Mental-health-classification"
base_model = AutoModelForSequenceClassification.from_pretrained("meta-llama/Llama-3.2-1B-Instruct", num_labels=len(label_encoder.classes_))
model = PeftModel.from_pretrained(base_model, model_path)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id


# 3. Run inference
trainer = Trainer(model=model, tokenizer=tokenizer)
predictions = trainer.predict(test_dataset)

# 4. Compute and print classification report
preds = np.argmax(predictions.predictions, axis=-1)
labels = predictions.label_ids

print(classification_report(labels, preds, target_names=label_encoder.classes_))