In [1]:
import numpy as np
import pandas as pd
import re
import joblib
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
import tensorflow as tf

# Data Preparation

In [2]:

from datasets import load_dataset

# Load dataset with split
ds = load_dataset("dair-ai/emotion", split="train")

# Convert to pandas DataFrame for easy viewing
df_train = ds.to_pandas()

# Display the first few rows
print(df_train.head())

# Show column names
print("Columns:", df_train.columns)


  from .autonotebook import tqdm as notebook_tqdm


                                                text  label
0                            i didnt feel humiliated      0
1  i can go from feeling so hopeless to so damned...      0
2   im grabbing a minute to post i feel greedy wrong      3
3  i am ever feeling nostalgic about the fireplac...      2
4                               i am feeling grouchy      3
Columns: Index(['text', 'label'], dtype='object')


In [3]:
ds=load_dataset("dair-ai/emotion",split="validation")
df_validate=ds.to_pandas()
df_validate.head()


Unnamed: 0,text,label
0,im feeling quite sad and sorry for myself but ...,0
1,i feel like i am still looking at a blank canv...,0
2,i feel like a faithful servant,2
3,i am just feeling cranky and blue,3
4,i can have for a treat or if i am feeling festive,1


In [4]:
ds=load_dataset("dair-ai/emotion",split="test")
df_test=ds.to_pandas()
df_test.head()

Unnamed: 0,text,label
0,im feeling rather rotten so im not very ambiti...,0
1,im updating my blog because i feel shitty,0
2,i never make her separate from me because i do...,0
3,i left with my bouquet of red and yellow tulip...,1
4,i was feeling a little vain when i did this one,0


In [5]:
df_train['label'].value_counts()

label
1    5362
0    4666
3    2159
4    1937
2    1304
5     572
Name: count, dtype: int64

# Data-preprocessing

In [6]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
stemmer=PorterStemmer()
corpus=[]
for i in range(0,len(df_train)):
    text=re.sub('[^a-zA-Z]',' ', df_train['text'][i])
    text=text.lower()
    text=text.split()
    text=[stemmer.stem(word) for word in text if word not in set(stopwords.words('english'))]
    text=' '.join(text)
    corpus.append(text)

In [7]:
# Vectorization
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_features=3000)
X = vectorizer.fit_transform(corpus).toarray()

# Encode Labels

In [9]:
# emotion
emotion_encoder = LabelEncoder()
y_emotion=emotion_encoder.fit_transform(df_train['label'])

In [19]:
emotion_to_advice = {
    0: "Talk to a friend",           # sadness
    1: "Try deep breathing",         # joy
    2: "Challenge your fear",        # anger
    3: "Reflect on the cause",       # fear
    4: "Celebrate your wins",        # love
    5: "Share it"                    # surprise
}
def add_advice(example):
    example["advice"] = emotion_to_advice.get(example["label"], "Take a moment")
    return example


In [23]:
from datasets import load_dataset, Dataset
df_train_dataset = Dataset.from_dict(df_train)


emotion_to_advice = {
    0: "Talk to a friend",     # sadness
    1: "Try deep breathing",   # joy
    2: "Challenge your fear",  # anger
    3: "Reflect on the cause", # fear
    4: "Celebrate your wins",  # love
    5: "Share it"              # surprise
}

def add_advice(example):
    example["advice"] = emotion_to_advice.get(example["label"], "Take a moment")
    return example


df_train_with_advice = df_train_dataset.map(add_advice)

df_train_pandas = df_train_with_advice.to_pandas()


#LabelEncoder on the advice
advice_encoder = LabelEncoder()
y_advice = advice_encoder.fit_transform(df_train_pandas["advice"])

print(df_train_pandas.head())
print(y_advice)

Map: 100%|█████████████████████████████████████████████████████████████| 16000/16000 [00:01<00:00, 14173.31 examples/s]

                                                text  label  \
0                            i didnt feel humiliated      0   
1  i can go from feeling so hopeless to so damned...      0   
2   im grabbing a minute to post i feel greedy wrong      3   
3  i am ever feeling nostalgic about the fireplac...      2   
4                               i am feeling grouchy      3   

                 advice  
0      Talk to a friend  
1      Talk to a friend  
2  Reflect on the cause  
3   Challenge your fear  
4  Reflect on the cause  
[4 4 2 ... 5 2 4]





In [24]:
print(df_train_with_advice.features)

{'text': Value(dtype='string', id=None), 'label': Value(dtype='int64', id=None), 'advice': Value(dtype='string', id=None)}


In [26]:
emotion_model = Sequential([
    Dense(128, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(np.unique(y_emotion)), activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
emotion_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
emotion_model.fit(X, y_emotion, epochs=10, batch_size=2, verbose=1)

Epoch 1/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 7ms/step - accuracy: 0.6282 - loss: 0.9983
Epoch 2/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9025 - loss: 0.2754
Epoch 3/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9425 - loss: 0.1618
Epoch 4/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9623 - loss: 0.1100
Epoch 5/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9740 - loss: 0.0761
Epoch 6/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9830 - loss: 0.0497
Epoch 7/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 7ms/step - accuracy: 0.9844 - loss: 0.0447
Epoch 8/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 7ms/step - accuracy: 0.9873 - loss: 0.0329
Epoch 9/10
[1m8

<keras.src.callbacks.history.History at 0x20ce0f41040>

In [28]:
advice_model = Sequential([
    Dense(128, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(len(np.unique(y_advice)), activation='softmax')
])

advice_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
advice_model.fit(X, y_advice, epochs=10, batch_size=2, verbose=1)

Epoch 1/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 7ms/step - accuracy: 0.6144 - loss: 1.0082
Epoch 2/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 8ms/step - accuracy: 0.9009 - loss: 0.2764
Epoch 3/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 8ms/step - accuracy: 0.9376 - loss: 0.1677
Epoch 4/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 10ms/step - accuracy: 0.9588 - loss: 0.1126
Epoch 5/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 9ms/step - accuracy: 0.9738 - loss: 0.0752
Epoch 6/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9800 - loss: 0.0550
Epoch 7/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9863 - loss: 0.0378
Epoch 8/10
[1m8000/8000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - accuracy: 0.9880 - loss: 0.0346
Epoch 9/10
[1m

<keras.src.callbacks.history.History at 0x20ce0f40740>

In [29]:
#Rule-Based Distortion Detection
def detect_distortion(text):
    text = text.lower()
    if "always" in text or "never" in text:
        return "Overgeneralization"
    elif "disaster" in text or "ruined" in text:
        return "Catastrophizing"
    elif "they probably think" in text:
        return "Mind Reading"
    else:
        return "None"

In [30]:
#Analyze New Entry
def analyze_entry(user_input):
    processed = preprocess(user_input)
    vector = vectorizer.transform([processed]).toarray()

    emotion_pred = emotion_model.predict(vector)
    emotion_label = emotion_encoder.inverse_transform([emotion_pred.argmax()])[0]

    advice_pred = advice_model.predict(vector)
    advice_label = advice_encoder.inverse_transform([advice_pred.argmax()])[0]

    distortion = detect_distortion(user_input)

    return emotion_label, distortion, advice_label

In [31]:
# Save Models and Vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")
emotion_model.save("emotion_model.h5")
advice_model.save("advice_model.h5")
joblib.dump(emotion_encoder, "emotion_encoder.pkl")
joblib.dump(advice_encoder, "advice_encoder.pkl")



['advice_encoder.pkl']