In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

train_df = pd.read_csv("train.txt", sep=";", names=["text", "emotion"])
val_df = pd.read_csv("val.txt", sep=";", names=["text", "emotion"])

# Label encode emotions
le = LabelEncoder()
train_df['label'] = le.fit_transform(train_df['emotion'])
val_df['label'] = le.transform(val_df['emotion'])

train_df.head()

Unnamed: 0,text,emotion,label
0,i didnt feel humiliated,sadness,4
1,i can go from feeling so hopeless to so damned...,sadness,4
2,im grabbing a minute to post i feel greedy wrong,anger,0
3,i am ever feeling nostalgic about the fireplac...,love,3
4,i am feeling grouchy,anger,0


In [4]:
print("Torch version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

Torch version: 2.6.0+cu118
CUDA Available: True
Device: NVIDIA GeForce RTX 3060 Laptop GPU


In [5]:
from transformers import DistilBertTokenizerFast

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

train_encodings = tokenizer(train_df['text'].tolist(), truncation=True, padding=True)
val_encodings = tokenizer(val_df['text'].tolist(), truncation=True, padding=True)

import torch

class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)

train_dataset = EmotionDataset(train_encodings, train_df['label'].tolist())
val_dataset = EmotionDataset(val_encodings, val_df['label'].tolist())


In [6]:
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments

model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased', 
    num_labels=len(le.classes_)
)

training_args = TrainingArguments(
    output_dir='./results',
    do_eval=True,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    logging_dir='./logs',
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
10,1.7267
20,1.5594
30,1.4498
40,1.2424
50,1.1514
60,1.1478
70,1.0046
80,1.0066
90,0.8515
100,0.7727


TrainOutput(global_step=3000, training_loss=0.20127279564738274, metrics={'train_runtime': 345.0499, 'train_samples_per_second': 139.11, 'train_steps_per_second': 8.694, 'total_flos': 1080514292544000.0, 'train_loss': 0.20127279564738274, 'epoch': 3.0})

In [7]:
model.save_pretrained("moodify_model")
tokenizer.save_pretrained("moodify_model")

('moodify_model\\tokenizer_config.json',
 'moodify_model\\special_tokens_map.json',
 'moodify_model\\vocab.txt',
 'moodify_model\\added_tokens.json',
 'moodify_model\\tokenizer.json')

In [8]:
def predict_emotion(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted = torch.argmax(probs, dim=-1).item()
    confidence = torch.max(probs).item()
    return le.classes_[predicted], confidence


In [10]:
predict_emotion("I feel like everything is going well today.")

('joy', 0.9995564818382263)

In [13]:
import gradio as gr

def moodify_interface(text):
    emotion, confidence = predict_emotion(text)
    return f"Emotion: {emotion}\nConfidence: {confidence:.2f}"

gr.Interface(fn=moodify_interface,
             inputs="text",
             outputs="text",
             title="Moodify - Emotion Detector",
             description="Enter a sentence or paragraph, and Moodify will analyze your emotional tone!"
).launch(share=True)


* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://211f8254e03a669558.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


