In [1]:
pip install torch transformers pandas scikit-learn



In [2]:
import pandas as pd

# Load the datasets
train_df = pd.read_csv('training.csv')
val_df = pd.read_csv('validation.csv')
test_df = pd.read_csv('test.csv')

# Display the first few rows of each dataset
print("Training Data:")
print(train_df.head())

print("\nValidation Data:")
print(val_df.head())

print("\nTest Data:")
print(test_df.head())


Training Data:
                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3

Validation Data:
                                                text  label
0  im feeling quite sad and sorry for myself but ...      0
1  i feel like i am still looking at a blank canv...      0
2                     i feel like a faithful servant      2
3                  i am just feeling cranky and blue      3
4  i can have for a treat or if i am feeling festive      1

Test Data:
                                                text  label
0  im feeling rather rotten so im not very ambiti...      0
1          im updating my blog because i feel shitty      0
2  i never make her separate from me because i do...   

In [3]:
import torch
from transformers import DistilBertTokenizerFast
from sklearn.preprocessing import LabelEncoder

In [4]:
# Initialize the DistilBERT tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
# Initialize Label Encoder
label_encoder = LabelEncoder()

# Fit and transform labels
train_labels = label_encoder.fit_transform(train_df['label'])
val_labels = label_encoder.transform(val_df['label'])
test_labels = label_encoder.transform(test_df['label'])

# Display label encoding mapping
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Mapping:", label_mapping)

Label Mapping: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}


In [6]:
def tokenize_data(texts, tokenizer, max_length=128):
    return tokenizer(
        texts.tolist(),
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors='pt'
    )


In [7]:
# Tokenize training data
train_encodings = tokenize_data(train_df['text'], tokenizer)

# Tokenize validation data
val_encodings = tokenize_data(val_df['text'], tokenizer)

# Tokenize test data
test_encodings = tokenize_data(test_df['text'], tokenizer)

In [8]:
from torch.utils.data import Dataset

class TherapyDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

In [9]:
# Create dataset instances
train_dataset = TherapyDataset(train_encodings, train_labels)
val_dataset = TherapyDataset(val_encodings, val_labels)
test_dataset = TherapyDataset(test_encodings, test_labels)

In [10]:
from torch.utils.data import DataLoader

# Define batch size
batch_size = 16

# Create DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
from transformers import DistilBertForSequenceClassification

In [12]:
# Define number of labels
num_labels = len(label_encoder.classes_)

# Initialize DistilBERT model for sequence classification
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=num_labels
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# Check if GPU is available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Using device: {device}')

# Move model to device
model.to(device)

Using device: cuda


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [14]:
from transformers import AdamW, get_linear_schedule_with_warmup

# Define optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Define number of training epochs
epochs = 3

# Calculate total steps
total_steps = len(train_loader) * epochs

# Define scheduler
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)



In [15]:
from tqdm.auto import tqdm

def train(model, loader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    for batch in tqdm(loader, desc="Training"):
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        # Backward pass
        loss.backward()

        # Gradient clipping (optional, helps with stability)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        # Update parameters and scheduler
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    avg_loss = total_loss / len(loader)
    return avg_loss

In [16]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate(model, loader, device):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(loader, desc="Evaluating"):
            # Move batch to device
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = labels.to('cpu').numpy()

            # Get predictions
            preds = logits.argmax(axis=1)

            # Store predictions and true labels
            predictions.extend(preds)
            true_labels.extend(label_ids)

    # Calculate metrics
    acc = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions, average='weighted')

    return acc, f1

In [17]:
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    print("-" * 20)

    # Train the model
    train_loss = train(model, train_loader, optimizer, scheduler, device)
    print(f"Training Loss: {train_loss:.4f}")

    # Evaluate on validation set
    val_acc, val_f1 = evaluate(model, val_loader, device)
    print(f"Validation Accuracy: {val_acc:.4f}")
    print(f"Validation F1 Score: {val_f1:.4f}")



Epoch 1/3
--------------------


Training:   0%|          | 0/1000 [00:00<?, ?it/s]

Training Loss: 0.4958


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

Validation Accuracy: 0.9305
Validation F1 Score: 0.9307

Epoch 2/3
--------------------


Training:   0%|          | 0/1000 [00:00<?, ?it/s]

Training Loss: 0.1526


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

Validation Accuracy: 0.9330
Validation F1 Score: 0.9330

Epoch 3/3
--------------------


Training:   0%|          | 0/1000 [00:00<?, ?it/s]

Training Loss: 0.1012


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

Validation Accuracy: 0.9385
Validation F1 Score: 0.9385


In [18]:
# Load the best model if saved
# model.load_state_dict(torch.load('best_model.pth'))

# Evaluate on test set
test_acc, test_f1 = evaluate(model, test_loader, device)
print(f"\nTest Accuracy: {test_acc:.4f}")
print(f"Test F1 Score: {test_f1:.4f}")

Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]


Test Accuracy: 0.9280
Test F1 Score: 0.9274


In [28]:
import random

label_to_advice = {
    0: [
        "I'm sorry you're feeling down. It's okay to have off days. Try reaching out to someone you trust for a chat or take some time to relax with your favorite book or show. Small steps can make a big difference.",
        "Feeling sad can feel heavy. How about stepping outside for some fresh air or listening to a song that soothes you? It’s okay to give yourself a break and just breathe.",
        "Sometimes, sadness just needs to be acknowledged. Wrap yourself in a blanket, sip something warm, and remind yourself that this feeling will pass. You deserve comfort."
    ],
    1: [
        "Joy is a gift—embrace it! Maybe call up a friend and share the excitement, or treat yourself to something special. Keep this moment close to your heart.",
        "What a lovely feeling! Write down what made you happy so you can revisit it later. Or better yet, take a photo to capture the moment.",
        "Happiness is contagious—spread it around! A random act of kindness or even just sending a kind message can magnify your joy and brighten someone else’s day too."
    ],
    2: [
        "Love is such a beautiful feeling. Take a moment to appreciate those around you—perhaps send a heartfelt message or plan a small surprise for someone you care about.",
        "Let love guide your actions today. A warm hug or simple words of gratitude can strengthen bonds and lift spirits.",
        "You're radiating love—don’t hold back! Express it through small gestures or acts of kindness. Even writing a letter to yourself about what you love can be uplifting."
    ],
    3: [
        "Anger can be tricky, but it’s okay to feel this way. Step back, take a few deep breaths, and consider journaling to get those feelings out. Movement, like a quick walk, can also help.",
        "It’s natural to feel frustrated sometimes. Try to redirect the energy into something positive—cleaning your space, hitting the gym, or even creating something artistic.",
        "Feeling heated? Pause for a few moments and try counting to ten while breathing slowly. If possible, talk it out with someone who can listen without judgment."
    ],
    4: [
        "Fear can feel overwhelming, but grounding exercises might help. Close your eyes, take deep breaths, and focus on things you can feel, hear, or see around you.",
        "Uncertainty can trigger fear, but you're stronger than you realize. Talking it over with someone or writing down your thoughts may help clarify things.",
        "It's okay to feel afraid. Consider visualizing a safe space or recalling a comforting memory. Facing fears doesn’t mean you have to do it alone—ask for support when needed."
    ]
}

def get_random_advice(emotion_label):
    if emotion_label in label_to_advice:
        return random.choice(label_to_advice[emotion_label])
    return "I'm here for you. Let me know more about how you're feeling."


In [29]:
def get_advice(text, model, tokenizer, label_encoder, device, max_length=128):
    model.eval()

    # Tokenize the input text
    encoding = tokenizer(
        text,
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors='pt'
    )

    # Move tensors to the device
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = logits.argmax(dim=1).cpu().numpy()

    # Decode the predicted label
    predicted_label = label_encoder.inverse_transform(preds)[0]

    # Get corresponding advice
    advice = label_to_advice.get(predicted_label, "I'm here to listen. Tell me more about how you're feeling.")

    return advice

In [30]:
# Example user input
user_input = "I met my aunt and was excited to see that she was still very active and alived."
# Get advice
advice = get_advice(user_input, model, tokenizer, label_encoder, device)
print(f"Advice: {advice}")

Advice: ['Joy is a gift—embrace it! Maybe call up a friend and share the excitement, or treat yourself to something special. Keep this moment close to your heart.', 'What a lovely feeling! Write down what made you happy so you can revisit it later. Or better yet, take a photo to capture the moment.', 'Happiness is contagious—spread it around! A random act of kindness or even just sending a kind message can magnify your joy and brighten someone else’s day too.']


In [31]:
import random
def get_advice(text, model, tokenizer, label_encoder, device, max_length=128):
        # ... (Existing code) ...
        user_input = "I met my aunt and was excited to see that she was still very active and alived."
        # Get corresponding advice randomly
        advice = get_random_advice(preds[0])  # preds[0] gives the predicted label index

        return advice

In [33]:
# Install gradio
!pip install gradio

import gradio as gr

# Define the function to get advice
def get_advice_ui(text):
    advice = get_advice(text, model, tokenizer, label_encoder, device)
    return advice

# Create the Gradio interface
interface = gr.Interface(
    fn=get_advice_ui,
    inputs="text",
    outputs="text",
    title="Therapist in Your Pocket",
    description="Enter how you're feeling or describe your day, and get tailored advice to uplift your mood."
)

# Launch the interface
interface.launch()


AttributeError: module 'gradio' has no attribute 'inputs'