In [None]:
!pip install datasets transformers evaluate -q

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline
import evaluate

dataset = load_dataset("dair-ai/emotion")


small_train = dataset["train"].shuffle(seed=42).select(range(5000))
small_test = dataset["test"].shuffle(seed=42).select(range(1000))

# Tokenizer & Model
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)

small_train = small_train.map(tokenize, batched=True)
small_test = small_test.map(tokenize, batched=True)

# Label mapping
label2id = {label: i for i, label in enumerate(dataset["train"].features["label"].names)}
id2label = {i: label for label, i in label2id.items()}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=6,
    id2label=id2label,
    label2id=label2id
)

#  Metrics
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Training
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",   # fixed name (eval_strategy → evaluation_strategy)
    save_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


save_path = "./emotion_prediction_model"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

#  Prediction
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=1)

def predict_emotion(text):
    result = classifier(text)
    # Handle both cases (with or without top_k)
    if isinstance(result[0], list):
        result = result[0][0]  # unwrap nested list
    else:
        result = result[0]     # normal dict

    return result['label'], round(result['score'], 4)


# Example
print(predict_emotion("I am feeling very happy today!"))
print(predict_emotion("This is so sad and depressing."))
print(predict_emotion("I am so angry at this!"))


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

split/train-00000-of-00001.parquet:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

split/validation-00000-of-00001.parquet:   0%|          | 0.00/127k [00:00<?, ?B/s]

split/test-00000-of-00001.parquet:   0%|          | 0.00/129k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script: 0.00B [00:00, ?B/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.415353,0.867
2,0.706200,0.281217,0.897
3,0.706200,0.253529,0.907


Device set to use cuda:0


('joy', 0.9904)
('sadness', 0.9888)
('anger', 0.9814)


In [None]:
!pip install streamlit pyngrok cloudflared -q


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m77.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for cloudflared (setup.py) ... [?25l[?25hdone


In [None]:
!pip install streamlit transformers huggingface_hub torch
!wget -q -O cloudflared.deb https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared.deb


Selecting previously unselected package cloudflared.
(Reading database ... 126371 files and directories currently installed.)
Preparing to unpack cloudflared.deb ...
Unpacking cloudflared (2025.8.1) ...
Setting up cloudflared (2025.8.1) ...
Processing triggers for man-db (2.10.2-1) ...


In [None]:
!ls /content/emotion_prediction_model

config.json	   special_tokens_map.json  tokenizer.json
model.safetensors  tokenizer_config.json    vocab.txt


In [None]:
%%writefile app.py
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Set page config for a wider layout and potentially a title
st.set_page_config(layout="wide", page_title="Emotion Predictor")

# Load the saved model and tokenizer
model_path = "/content/emotion_prediction_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=1)

# Add custom CSS for styling
st.markdown(
    """
    <style>
    .stApp {
        background-image: url("https://images5.alphacoders.com/132/1327980.png");
        background-size: cover;
        background-repeat: no-repeat;
        background-attachment: fixed;
        color: #551764; /* Set text color to white for better readability */
    }
    .stTextArea textarea {
        background-color: rgba(255, 255, 255, 0.8); /* Semi-transparent white background for text area */
        color: black; /* Black text color for text area */
    }
    .stButton button {
        background-color: #4CAF50; /* Green button */
        color: white;
        padding: 10px 24px;
        text-align: center;
        text-decoration: none;
        display: inline-block;
        font-size: 16px;
        margin: 4px 2px;
        cursor: pointer;
        border-radius: 8px;
    }
    .stButton button:hover {
        background-color: #45a049;
    }
    /* Style for the prediction output */
    .stMarkdown strong {
        color: # 601A35; /* Gold color for predicted emotion */
    }
    </style>
    """,
    unsafe_allow_html=True
)


st.title("Emotion Prediction App")

st.write("Enter some text below and I will try to predict the emotion.")

user_input = st.text_area("Enter text here:")

if st.button("Predict Emotion"):
    if user_input:
        # Get prediction
        result = classifier(user_input)

        # Handle the result format (it's a list of lists or list of dicts depending on top_k)
        if isinstance(result[0], list):
            prediction = result[0][0]
        else:
            prediction = result[0]

        predicted_label = prediction['label']
        confidence_score = round(prediction['score'], 4)

        st.write(f"Predicted Emotion: **{predicted_label}**")
        st.write(f"Confidence Score: {confidence_score}")
    else:
        st.write("Please enter some text to predict the emotion.")

Overwriting app.py


In [None]:
import subprocess
import time
import sys

process = subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"])

# Give Streamlit a moment to start
time.sleep(5)

cloudflared_process = subprocess.Popen(["cloudflared", "tunnel", "--url", "http://localhost:8501"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

for line in cloudflared_process.stderr:
    print(line.decode(), end="")

2025-08-29T16:24:00Z INF Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
2025-08-29T16:24:00Z INF Requesting new quick Tunnel on trycloudflare.com...
2025-08-29T16:24:05Z INF +--------------------------------------------------------------------------------------------+
2025-08-29T16:24:05Z INF |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
2025-08-29T16:24:05Z INF |  https://wishlist-neither-ul-westminster.trycloudflare