<a href="https://colab.research.google.com/github/Deon62/FIneTuned-EmotionDetectModel/blob/main/ModelFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets evaluate nlpaug


In [None]:
from datasets import load_dataset
import pandas as pd
import nlpaug.augmenter.word as naw
from collections import Counter


In [None]:

ds = load_dataset("dair-ai/emotion", "split")

In [None]:
print(ds)

In [None]:
# Convert train split to DataFrame
train_df = pd.DataFrame(ds["train"])

# Display first few rows
print(train_df.head())

# Class distribution
counter = Counter(train_df["label"])
label_names = ds["train"].features["label"].names
for label_id, count in counter.items():
    print(f"{label_names[label_id]:<8}: {count}")


In [None]:
from transformers import DistilBertTokenizerFast

tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased",do_lower_case=True)

In [None]:
max_length = 128

def tokenize_batch(batch):
    return tokenizer(
        batch["text"],
        padding="max_length",
        truncation=True,
        max_length=max_length
    )


In [None]:
tokenized_datasets = ds.map(
    tokenize_batch,
    batched=True,
    remove_columns=["text"]
)


In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    padding="longest"
)


In [None]:
tokenized_datasets.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "label"]
)
train_ds = tokenized_datasets["train"]
val_ds   = tokenized_datasets["validation"]
test_ds  = tokenized_datasets["test"]


In [None]:
from transformers import (
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments
)
import evaluate


In [None]:
model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=6,                # our 6 emotion classes
    ignore_mismatched_sizes=True # safe if vocab sizes differ
)


In [None]:
accuracy_metric = evaluate.load("accuracy")
f1_metric       = evaluate.load("f1")

def compute_metrics(pred):
    logits, labels = pred
    preds = logits.argmax(axis=-1)
    acc = accuracy_metric.compute(predictions=preds, references=labels)
    f1  = f1_metric.compute(predictions=preds, references=labels, average="macro")
    return {"accuracy": acc["accuracy"], "f1_macro": f1["f1"]}


In [None]:
training_args = TrainingArguments(
    output_dir="emotion-distilbert",
    do_train=True,
    do_eval=True,
    eval_steps=500,
    save_steps=500,
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=4,
    weight_decay=0.01,
    warmup_ratio=0.1,
    logging_steps=50,
    fp16=True,
    gradient_checkpointing=False
)


In [None]:
!pip install peft


In [None]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,

    # Tell PEFT which linear modules to wrap with LoRA
    target_modules=["q_lin", "v_lin"]  # for DistilBERT’s attention projections
)
model = get_peft_model(model, lora_config)


In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total params: {total_params:,}")
print(f"Trainable params: {trainable_params:,}")  # should be just the LoRA adapters


In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,                # your LoRA-wrapped model
    args=training_args,         # the args that worked for you
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


In [None]:
train_result = trainer.train()


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import pandas as pd

# 1. Get predictions on the test split
preds_output = trainer.predict(test_ds)
pred_labels = np.argmax(preds_output.predictions, axis=-1)
true_labels = preds_output.label_ids

# 2. Map numeric IDs to emotion names
label_names = ds["train"].features["label"].names

# 3. Confusion Matrix
cm = confusion_matrix(true_labels, pred_labels)
cm_df = pd.DataFrame(cm, index=label_names, columns=label_names)
print("Confusion Matrix:\n", cm_df)

# 4. Classification Report
cr = classification_report(true_labels, pred_labels, target_names=label_names)
print("\nClassification Report:\n", cr)


In [None]:
model.save_pretrained("Deon_emotion-model")
tokenizer.save_pretrained("Deon-emotion-model")


In [None]:
# from huggingface_hub import login
# login()



In [None]:
model.push_to_hub("chinesemusk/Deon_emotion-model")
tokenizer.push_to_hub("chinesemusk/Deon-emotion-model")

In [None]:
from transformers import (
    DistilBertForSequenceClassification,
    DistilBertTokenizerFast,
    pipeline
)
from peft import PeftModel

# 1. Load the base model with the correct label count
base = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=6
)

# 2. Attach your LoRA adapters from your Hub repo
model = PeftModel.from_pretrained(
    base,
    "chinesemusk/Deon_emotion-model"  # your model repo name
)

# 3. Load the matching tokenizer (from the same repo)
tokenizer = DistilBertTokenizerFast.from_pretrained(
    "chinesemusk/Deon-emotion-model"
)

# 4. Build the pipeline on that exact model+tokenizer
classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True
)

# 5. Test it!
print(classifier("I just got promoted at work — I'm over the moon!"))


In [None]:
import torch
import torch.nn.functional as F

# 1. Put model in eval mode & move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()

# 2. Define label names
label_names = ds["train"].features["label"].names

def predict_emotion(text: str):
    # Tokenize & convert to tensors
    inputs = tokenizer(
        text,
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt"
    ).to(device)

    # Forward pass (LoRA adapters are active)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Softmax → probabilities
    probs = F.softmax(logits, dim=-1).squeeze().cpu().tolist()

    # Pair labels & scores
    return dict(zip(label_names, probs))

# 3. Try it out
print(predict_emotion("I can't believe how beautiful the sunset is!"))


In [None]:
merged_model = model.merge_and_unload()
merged_model.push_to_hub("chinesemusk/Deon_emotion-model-full")
tokenizer.push_to_hub("chinesemusk/Deon-emotion-model-full")


In [None]:
from transformers import pipeline

classifier = pipeline(
    "text-classification",
    model="chinesemusk/Deon_emotion-model-full",
    tokenizer="chinesemusk/Deon-emotion-model-full",
    top_k=None  # returns all class scores
)

text = "I'm feeling really hopeful and excited today!"
results = classifier(text)
print(results)


In [None]:
!pip install gradio
import gradio as gr
from transformers import pipeline

# Load your model and tokenizer from Hugging Face
classifier = pipeline(
    "text-classification",
    model="chinesemusk/Deon_emotion-model-full",
    tokenizer="chinesemusk/Deon-emotion-model-full",
    top_k=None  # to show scores for all classes
)

# Define the interface function
def classify_emotion(text):
    results = classifier(text)[0]
    # Format results for display
    return {res['label']: float(f"{res['score']:.3f}") for res in results}

# Launch the Gradio UI
gr.Interface(
    fn=classify_emotion,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
    outputs=gr.Label(num_top_classes=6),
    title="Deon Emotion Classifier",
    description="Enter a sentence to detect the expressed emotion (joy, love, sadness, anger, fear, surprise)."
).launch()


In [None]:
# app.py
import gradio as gr
from transformers import pipeline

# Load pipeline from your fine-tuned model
classifier = pipeline(
    "text-classification",
    model="chinesemusk/Deon_emotion-model-full",
    tokenizer="chinesemusk/Deon-emotion-model-full",
    top_k=None,  # Show all class probabilities
    device=0  # Use GPU if available
)

# Define prediction function
def predict_emotion(text):
    results = classifier(text)[0]
    # Format as label: probability%
    return {res["label"]: float(f"{res['score']:.4f}") for res in results}

# Build Gradio interface
demo = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Textbox(lines=3, placeholder="Enter text here..."),
    outputs=gr.Label(num_top_classes=6),
    title="Deon Emotion Classifier",
    description="Predicts emotion from English text using a fine-tuned DistilBERT model. Try inputs like 'I love this moment' or 'I'm scared.'",
    examples=[
        ["I love this!"],
        ["I feel so empty today..."],
        ["This is amazing!"],
        ["I'm really angry right now"],
        ["What a shocking turn of events!"]
    ]
)

if __name__ == "__main__":
    demo.launch()
