In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense

# Step 1: Load and inspect data
data = pd.read_csv("train.txt", sep=';', header=None)
data.columns = ["Text", "Emotions"]
print("Sample data:")
print(data.head())

# Step 2: Encode target labels
label_encoder = LabelEncoder()
data["Emotion_Label"] = label_encoder.fit_transform(data["Emotions"])

# Step 3: Tokenize and pad text data
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(data["Text"])

sequences = tokenizer.texts_to_sequences(data["Text"])
padded_sequences = pad_sequences(sequences, maxlen=100, padding='post')

# Step 4: Split data
X = padded_sequences
y = data["Emotion_Label"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Build model
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=100),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Step 6: Compile and train
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

print("\nTraining model...")
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Step 7: Evaluate
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.2f}")

# Optional: Predict on a new example
example_text = ["I feel amazing and full of energy!"]
example_seq = tokenizer.texts_to_sequences(example_text)
example_pad = pad_sequences(example_seq, maxlen=100, padding='post')

predicted_class = np.argmax(model.predict(example_pad), axis=1)
emotion = label_encoder.inverse_transform(predicted_class)

print(f"\nPrediction: {emotion[0]}")


Sample data:
                                                Text Emotions
0                            i didnt feel humiliated  sadness
1  i can go from feeling so hopeless to so damned...  sadness
2   im grabbing a minute to post i feel greedy wrong    anger
3  i am ever feeling nostalgic about the fireplac...     love
4                               i am feeling grouchy    anger





Training model...
Epoch 1/5
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.3609 - loss: 1.5650 - val_accuracy: 0.5997 - val_loss: 1.0704
Epoch 2/5
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 13ms/step - accuracy: 0.7635 - loss: 0.7138 - val_accuracy: 0.8334 - val_loss: 0.5172
Epoch 3/5
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.9507 - loss: 0.1846 - val_accuracy: 0.8394 - val_loss: 0.4827
Epoch 4/5
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.9858 - loss: 0.0669 - val_accuracy: 0.8459 - val_loss: 0.4686
Epoch 5/5
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 14ms/step - accuracy: 0.9927 - loss: 0.0354 - val_accuracy: 0.8481 - val_loss: 0.5058
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8538 - loss: 0.5197

Test Accuracy: 0.85
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

In [11]:
input_text = "i am ever feeling nostalgic about the fireplace i will know that it is still on the property"

# Define max_length used during training
max_length = 100  # Should match the value used in pad_sequences when training

# Preprocess the input text
input_sequence = tokenizer.texts_to_sequences([input_text])
padded_input_sequence = pad_sequences(input_sequence, maxlen=max_length, padding='post')

# Predict
prediction = model.predict(padded_input_sequence)
predicted_label = label_encoder.inverse_transform([np.argmax(prediction[0])])

# Output
print(f"Predicted Emotion: {predicted_label[0]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Predicted Emotion: love
