In [1]:
import pandas as pd
import numpy as np
import keras
import tensorflow
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense

# **Reading Data**

In [3]:
data = pd.read_csv("/content/drive/MyDrive/TextEmotion NLP/train.txt", sep=';')
data.columns = ["Text", "Emotions"]
print(data.head())

                                                Text Emotions
0  i can go from feeling so hopeless to so damned...  sadness
1   im grabbing a minute to post i feel greedy wrong    anger
2  i am ever feeling nostalgic about the fireplac...     love
3                               i am feeling grouchy    anger
4  ive been feeling a little burdened lately wasn...  sadness


# **Tokenizing**

In [4]:
texts = data["Text"].tolist()
labels = data["Emotions"].tolist()

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# **Padding the sequences to the same length**

In [6]:
sequences = tokenizer.texts_to_sequences(texts)
max_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# **Label encoding**

In [7]:
# Encode the string labels to integers
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# **One-hot encode the labels**

In [8]:
# One-hot encode the labels
one_hot_labels = keras.utils.to_categorical(labels)

# **Text Emotions Classification Model**

## **split the data into training and test sets**

In [9]:
# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(padded_sequences,
                                                one_hot_labels,
                                                test_size=0.2)

## **Model**

In [10]:
# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1,
                    output_dim=128, input_length=max_length))
model.add(Flatten())
model.add(Dense(units=128, activation="relu"))
model.add(Dense(units=len(one_hot_labels[0]), activation="softmax"))

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(xtrain, ytrain, epochs=10, batch_size=32, validation_data=(xtest, ytest))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7aec7abddba0>

## **Checking the model performance**

In [13]:
# Define a list of input sentences with different emotions
input_sentences = [
    "I'm so excited for the concert tonight!",
    "Feeling sad about the news of my friend's illness.",
    "Happiness overwhelms me as I spend time with loved ones.",
    "The sound of footsteps echoing in the empty hallway sent shivers down my spine."
]

# Initialize an empty list to store predicted labels
predicted_labels = []

# Loop through each input sentence
for input_text in input_sentences:
    # Preprocess the input text
    input_sequence = tokenizer.texts_to_sequences([input_text])
    padded_input_sequence = pad_sequences(input_sequence, maxlen=max_length)

    # Make prediction
    prediction = model.predict(padded_input_sequence)

    # Decode the predicted label
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction[0])])

    # Append the predicted label to the list of predicted labels
    predicted_labels.append(predicted_label)

# Print the predicted labels for each input sentence
for i, predicted_label in enumerate(predicted_labels):
    print(f"Sentence {i+1}: {predicted_label}")


Sentence 1: ['joy']
Sentence 2: ['sadness']
Sentence 3: ['love']
Sentence 4: ['sadness']


## **Testion Model accuracy**

using `test.txt` with 2000 labeled sentences for testing

In [15]:
# Load the testing data from test.txt
test_data = pd.read_csv("/content/drive/MyDrive/TextEmotion NLP/test.txt", sep=';')
test_data.columns = ["Text", "Expected_Outcome"]

# Tokenize and pad sequences for testing data
test_texts = test_data["Text"].tolist()
test_sequences = tokenizer.texts_to_sequences(test_texts)
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_length)

# Get the expected outcomes from the testing data
expected_outcomes = test_data["Expected_Outcome"].tolist()

# Use the trained model to predict emotions for testing data
predicted_labels_test = []

for test_input_sequence in padded_test_sequences:
    # Make prediction
    prediction_test = model.predict(np.array([test_input_sequence]))

    # Decode the predicted label
    predicted_label_test = label_encoder.inverse_transform([np.argmax(prediction_test[0])])

    # Append the predicted label to the list of predicted labels for testing data
    predicted_labels_test.append(predicted_label_test)

# Print the predicted emotions along with expected outcomes for each input text in the testing data
print("Predicted Emotions for Testing Data:")
for i, (predicted_label_test, expected_outcome) in enumerate(zip(predicted_labels_test, expected_outcomes)):
    print(f"Text {i+1}: Predicted: {predicted_label_test}, Expected: {expected_outcome}")

# Evaluate the model on the testing data
loss, accuracy = model.evaluate(padded_test_sequences, keras.utils.to_categorical(label_encoder.transform(expected_outcomes)))
print(f"Accuracy on Testing Data: {accuracy}")


Predicted Emotions for Testing Data:
Text 1: Predicted: ['sadness'], Expected: sadness
Text 2: Predicted: ['sadness'], Expected: sadness
Text 3: Predicted: ['joy'], Expected: joy
Text 4: Predicted: ['sadness'], Expected: sadness
Text 5: Predicted: ['fear'], Expected: fear
Text 6: Predicted: ['joy'], Expected: anger
Text 7: Predicted: ['joy'], Expected: joy
Text 8: Predicted: ['joy'], Expected: joy
Text 9: Predicted: ['joy'], Expected: anger
Text 10: Predicted: ['anger'], Expected: fear
Text 11: Predicted: ['love'], Expected: sadness
Text 12: Predicted: ['sadness'], Expected: fear
Text 13: Predicted: ['love'], Expected: joy
Text 14: Predicted: ['love'], Expected: love
Text 15: Predicted: ['sadness'], Expected: sadness
Text 16: Predicted: ['joy'], Expected: joy
Text 17: Predicted: ['sadness'], Expected: sadness
Text 18: Predicted: ['anger'], Expected: anger
Text 19: Predicted: ['joy'], Expected: joy
Text 20: Predicted: ['fear'], Expected: sadness
Text 21: Predicted: ['joy'], Expected: jo

Model Accuracy with 1999 testing samples : 0.8204101920127869