In [None]:
import numpy as np
import pandas as pd

# Load data

In [2]:
df = pd.read_csv('/kaggle/input/emotions/text.csv')

In [3]:
print('Six categories: sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5)')

Six categories: sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5)


In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,label
0,0,i just feel really helpless and heavy hearted,4
1,1,ive enjoyed being able to slouch about relax a...,0
2,2,i gave up my internship with the dmrg and am f...,4
3,3,i dont know i feel so lost,0
4,4,i am a kindergarten teacher and i am thoroughl...,4


In [5]:
df.tail()

Unnamed: 0.1,Unnamed: 0,text,label
416804,416804,i feel like telling these horny devils to find...,2
416805,416805,i began to realize that when i was feeling agi...,3
416806,416806,i feel very curious be why previous early dawn...,5
416807,416807,i feel that becuase of the tyranical nature of...,3
416808,416808,i think that after i had spent some time inves...,5


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 3 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   Unnamed: 0  416809 non-null  int64 
 1   text        416809 non-null  object
 2   label       416809 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 9.5+ MB


In [7]:
X = df['text']
y = df['label']
print(type(X))
print(type(y))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


# Data processing

In [8]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [9]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
important_words = {'no', 'not', 'nor', 'never'}

X1 = X.apply(lambda x: ' '.join([word for word in x.split() if( word.lower() not in (stop_words) or word.lower() in important_words)]))

In [10]:
print(X1.head())
print(type(X1))

0                   feel really helpless heavy hearted
1    ive enjoyed able slouch relax unwind frankly n...
2              gave internship dmrg feeling distraught
3                                  dont know feel lost
4    kindergarten teacher thoroughly weary job take...
Name: text, dtype: object
<class 'pandas.core.series.Series'>


In [11]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words = 50000)
tokenizer.fit_on_texts(X1)
X_sequences = tokenizer.texts_to_sequences(X1)

2025-07-10 03:33:43.410275: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752118423.618957      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752118423.677481      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [12]:
import pickle
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

In [13]:
X_sequences[0]

[1, 6, 181, 1370, 2981]

In [14]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_len = max(len(tokens) for tokens in X_sequences)
X_padded = pad_sequences(X_sequences, padding='post', maxlen=max_len)

In [15]:
max_len

82

In [16]:
print(type(X_padded))
print(type(y))

<class 'numpy.ndarray'>
<class 'pandas.core.series.Series'>


In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, train_size=0.85, random_state=42)

In [18]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(354287, 82)
(354287,)
(62522, 82)
(62522,)


# Build model

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

In [20]:
model = Sequential([
    Embedding(input_dim=50001, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(128, return_sequences=True)),
    Bidirectional(LSTM(64, return_sequences=True)),
    BatchNormalization(),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.2),
    Dense(6, activation='softmax')
])

I0000 00:00:1752118481.672619      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1752118481.673275      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [21]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_split=0.05, epochs=10, batch_size=128)

Epoch 1/10


I0000 00:00:1752118494.760459     102 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m2630/2630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 37ms/step - accuracy: 0.8497 - loss: 0.3680 - val_accuracy: 0.9402 - val_loss: 0.0925
Epoch 2/10
[1m2630/2630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 38ms/step - accuracy: 0.9405 - loss: 0.0964 - val_accuracy: 0.9420 - val_loss: 0.0933
Epoch 3/10
[1m2630/2630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 39ms/step - accuracy: 0.9430 - loss: 0.0902 - val_accuracy: 0.9398 - val_loss: 0.0919
Epoch 4/10
[1m2630/2630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 39ms/step - accuracy: 0.9437 - loss: 0.0862 - val_accuracy: 0.9414 - val_loss: 0.0916
Epoch 5/10
[1m2630/2630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 38ms/step - accuracy: 0.9436 - loss: 0.0836 - val_accuracy: 0.9413 - val_loss: 0.0921
Epoch 6/10
[1m2630/2630[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 38ms/step - accuracy: 0.9442 - loss: 0.0824 - val_accuracy: 0.9423 - val_loss: 0.0953
Epoch 7/1

<keras.src.callbacks.history.History at 0x7f94f4fced90>

# Test with test set

In [22]:
model.evaluate(X_test, y_test)

[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 9ms/step - accuracy: 0.9395 - loss: 0.1053


[0.10516417771577835, 0.9407888650894165]

# Save model

In [23]:
model.save('emotions_model.h5')


# Test model with text

In [34]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.tokenize import word_tokenize
import nltk

nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
def preprocess_text(text, tokenizer, max_len):
    stop_words = set(stopwords.words('english'))
    important_words = {'no', 'not', 'nor', 'never'}
    
    tokens = word_tokenize(text)
    filtered = [word for word in tokens if word.lower() not in stop_words or word.lower() in important_words]
    
    text_processed = ' '.join(filtered)
    seq = tokenizer.texts_to_sequences([text_processed])

    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    
    return padded


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [35]:
X = "I’m completely fed up with this situation, and I’m absolutely furious right now!"
x_input = preprocess_text(X, tokenizer, max_len)

# Dự đoán
y_pred = model.predict(x_input)

predicted_class = y_pred.argmax(axis=1)[0]
print("Predicted class:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 659ms/step
Predicted class: 3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step


# App create

In [30]:
! pip install gradio



In [38]:
import gradio as gr
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk

nltk.download('punkt')
nltk.download('stopwords')

model = load_model("emotions_model.h5")

import pickle
with open("/kaggle/working/tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

max_len = 100 
stop_words = set(stopwords.words('english'))
important_words = {'no', 'not', 'nor', 'never'}
label_map = {
    0: 'Sadness',
    1: 'Joy',
    2: 'Love',
    3: 'Anger',
    4: 'Fear',
    5: 'Surprise'
}

def predict_emotion(text):
    tokens = word_tokenize(text)
    filtered = [word for word in tokens if word.lower() not in stop_words or word.lower() in important_words]
    text_processed = ' '.join(filtered)
    seq = tokenizer.texts_to_sequences([text_processed])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')

    y_pred = model.predict(padded)
    predicted_class = int(np.argmax(y_pred, axis=1)[0])
    confidence = float(np.max(y_pred))

    label = label_map[predicted_class]  
    
    return f"Emotional sentences:: {predicted_class} - {label} (Confidence: {confidence:.2f})"

demo = gr.Interface(
    fn=predict_emotion,
    inputs=gr.Textbox(lines=3, placeholder="Enter a sentence..."),
    outputs="text",
    title="Emotion Classifier",
    description="Enter an English sentence and get the predicted emotion."
)

demo.launch()


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


* Running on local URL:  http://127.0.0.1:7865
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://ba3d8bf8f6aa106064.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


