In [59]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.layers import TextVectorization, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import Accuracy


In [72]:

df = pd.read_csv('training.csv')  # Replace with your dataset path

# Define input variables and labels
X = df['text']  # The text data
y = df['label']  # The emotion labels

# Convert emotion labels to one-hot encoding
label_binarizer = LabelBinarizer()
y_one_hot = label_binarizer.fit_transform(y)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.3, random_state=42)

# Parameters
max_features = 10000  # Vocabulary size
sequence_length = 70  # Maximum length of the sequences

# Define TextVectorization layer
vectorizer = TextVectorization(
    max_tokens=max_features,
    output_sequence_length=sequence_length,
    pad_to_max_tokens=True
)

# Prepare the data for the TextVectorization layer
vectorizer.adapt(X_train)

# Apply TextVectorization to the data
X_train_vectorized = vectorizer(X_train)
X_test_vectorized = vectorizer(X_test)


In [73]:
# Build and compile the model
num_labels = y_one_hot.shape[1]

input_layer = Input(shape=(70,), dtype=tf.int32, name='text_input')
embedding_layer = tf.keras.layers.Embedding(input_dim=1000, output_dim=128)(input_layer)
x = tf.keras.layers.Conv1D(128, 5, activation='relu')(embedding_layer)
x = tf.keras.layers.MaxPooling1D(pool_size=4)(x)
x = tf.keras.layers.Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(num_labels, activation='sigmoid')(x)  # Sigmoid for multi-label classification

model = Model(inputs=input_layer, outputs=output_layer)

optimizer = Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss=BinaryCrossentropy(), metrics=[Accuracy()])

In [64]:
# Train the model
history = model.fit(
    X_train_vectorized,
    y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.1,
    verbose=1
)

Epoch 1/10
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 22ms/step - accuracy: 0.0000e+00 - loss: 0.4855 - val_accuracy: 0.0000e+00 - val_loss: 0.4108
Epoch 2/10
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.0000e+00 - loss: 0.4069 - val_accuracy: 0.0000e+00 - val_loss: 0.4099
Epoch 3/10
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.0000e+00 - loss: 0.4033 - val_accuracy: 0.0000e+00 - val_loss: 0.4091
Epoch 4/10
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.0000e+00 - loss: 0.4017 - val_accuracy: 0.0000e+00 - val_loss: 0.4007
Epoch 5/10
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.0000e+00 - loss: 0.3812 - val_accuracy: 0.0000e+00 - val_loss: 0.3571
Epoch 6/10
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.0000e+00 - loss: 0.3218 - val_accuracy: 0.00

In [65]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_vectorized, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 4.4665e-05 - loss: 0.2096
Test Loss: 0.2093
Test Accuracy: 0.0000


In [66]:
# Predict on test set
y_pred = model.predict(X_test_vectorized)
y_pred_labels = (y_pred > 0.5).astype(int)

[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


In [67]:
## Classification report and confusion matrix
from sklearn.metrics import classification_report, confusion_matrix

y_test_labels = (y_test > 0.5).astype(int)

print("Classification Report:")
print(classification_report(y_test_labels, y_pred_labels, target_names=label_binarizer.classes_))

# Plot confusion matrix
cm = confusion_matrix(y_test_labels.argmax(axis=1), y_pred_labels.argmax(axis=1))
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_binarizer.classes_, yticklabels=label_binarizer.classes_)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))


TypeError: object of type 'numpy.int64' has no len()

In [None]:
# Example usage
custom_texts = ["I am so happy today!", "I feel really sad about this."]
emotion_preds = predict_emotions(custom_texts)
for i, text in enumerate(custom_texts):
    print(f"Text: '{text}'")
    for emotion, score in zip(emotion_labels, emotion_preds[i]):
        print(f"{emotion}: {score*100:.2f}%")