<a href="https://colab.research.google.com/github/MrPrabhathPeri/Emotions-Prediction/blob/main/Emotion_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import nltk
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [10]:
# Adjust the path according to where you've saved the dataset
df = pd.read_csv('/content/Emotion_classify_Data.csv')

# Check the first few rows of the dataset
print(df.head())


                                             Comment Emotion
0  i seriously hate one subject to death but now ...    fear
1                 im so full of life i feel appalled   anger
2  i sit here to write i start to dig out my feel...    fear
3  ive been really angry with r and i feel like a...     joy
4  i feel suspicious if there is no one outside l...    fear


In [11]:
nltk.download('stopwords')
nltk.download('punkt')
stop_words = set(nltk.corpus.stopwords.words('english'))

def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(f'[{re.escape(string.punctuation)}]', '', text)  # Remove punctuation
    tokens = nltk.word_tokenize(text)  # Tokenize
    tokens = [word for word in tokens if word not in stop_words]  # Remove stop words
    return ' '.join(tokens)

df['clean_text'] = df['Comment'].apply(clean_text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['clean_text']).toarray()
y = df['Emotion']


# Naive Bayes Model

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
model = MultinomialNB()
model.fit(X_train, y_train)


In [15]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9107744107744108
Confusion Matrix:
 [[371  12   9]
 [ 22 383  11]
 [ 28  24 328]]
Classification Report:
               precision    recall  f1-score   support

       anger       0.88      0.95      0.91       392
        fear       0.91      0.92      0.92       416
         joy       0.94      0.86      0.90       380

    accuracy                           0.91      1188
   macro avg       0.91      0.91      0.91      1188
weighted avg       0.91      0.91      0.91      1188



# Neural Network Model

In [16]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(df['clean_text'])
X_nn = tokenizer.texts_to_sequences(df['clean_text'])
X_nn = pad_sequences(X_nn, maxlen=100)

X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y, test_size=0.2, random_state=42)

# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train_nn = le.fit_transform(y_train_nn)
y_test_nn = le.transform(y_test_nn)

# Define Neural Network
nn_model = Sequential()
nn_model.add(Dense(128, input_shape=(100,), activation='relu'))
nn_model.add(Dropout(0.5))
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dropout(0.5))
nn_model.add(Dense(len(le.classes_), activation='softmax'))

nn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
nn_model.fit(X_train_nn, y_train_nn, epochs=10, batch_size=32, validation_split=0.1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.3260 - loss: 226.7737 - val_accuracy: 0.3621 - val_loss: 11.8403
Epoch 2/10
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.3325 - loss: 49.5975 - val_accuracy: 0.3663 - val_loss: 1.8686
Epoch 3/10
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.3374 - loss: 15.2320 - val_accuracy: 0.3832 - val_loss: 1.1127
Epoch 4/10
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.3296 - loss: 7.4686 - val_accuracy: 0.3432 - val_loss: 1.0983
Epoch 5/10
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3295 - loss: 3.5674 - val_accuracy: 0.3032 - val_loss: 1.1018
Epoch 6/10
[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3385 - loss: 2.6087 - val_accuracy: 0.3158 - val_loss: 1.1018
Epoch 7/10
[1m134/1

<keras.src.callbacks.history.History at 0x7b266c613130>

In [17]:
nn_loss, nn_accuracy = nn_model.evaluate(X_test_nn, y_test_nn)
print("NN Accuracy:", nn_accuracy)

y_pred_nn = np.argmax(nn_model.predict(X_test_nn), axis=-1)
y_pred_nn_labels = le.inverse_transform(y_pred_nn)

print("NN Classification Report:\n", classification_report(y_test, y_pred_nn_labels))


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3163 - loss: 1.1007
NN Accuracy: 0.32828283309936523
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
NN Classification Report:
               precision    recall  f1-score   support

       anger       0.33      0.99      0.49       392
        fear       0.00      0.00      0.00       416
         joy       0.21      0.01      0.02       380

    accuracy                           0.33      1188
   macro avg       0.18      0.33      0.17      1188
weighted avg       0.18      0.33      0.17      1188



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Random Forest Classifier

In [18]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.9427609427609428
Confusion Matrix:
 [[363  16  13]
 [ 20 390   6]
 [  6   7 367]]
Classification Report:
               precision    recall  f1-score   support

       anger       0.93      0.93      0.93       392
        fear       0.94      0.94      0.94       416
         joy       0.95      0.97      0.96       380

    accuracy                           0.94      1188
   macro avg       0.94      0.94      0.94      1188
weighted avg       0.94      0.94      0.94      1188



In [19]:
# Sample new text data
new_text = "I am feeling very anxious about the upcoming exams."

# Preprocess the new text (same as during training)
cleaned_text = clean_text(new_text)

# Transform the cleaned text using the same TF-IDF vectorizer used during training
new_vector = vectorizer.transform([cleaned_text]).toarray()

# Predict the emotion using the trained Random Forest model
predicted_emotion_rf = rf_model.predict(new_vector)

# Print the predicted emotion
print(f"Predicted Emotion (Random Forest): {predicted_emotion_rf[0]}")

# For Naive Bayes:
predicted_emotion = model.predict(new_vector)
print(f"Predicted Emotion (NB): {predicted_emotion[0]}")

# For Neural Network:
new_sequence = tokenizer.texts_to_sequences([cleaned_text])
new_padded_sequence = pad_sequences(new_sequence, maxlen=100)
predicted_emotion_nn = np.argmax(nn_model.predict(new_padded_sequence), axis=-1)
print(f"Predicted Emotion (NN): {le.inverse_transform(predicted_emotion_nn)[0]}")



Predicted Emotion (Random Forest): fear
Predicted Emotion (NB): fear
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Predicted Emotion (NN): anger


Here, The predicted emotion for the given text "I am feeling very anxious about the upcoming exams." is fear for both **Naive bayes (NN)** and **Random Forest** algorithms, but the **neural networks (NN)** method did not give best accuracy.