In [1]:
# Ensure all necessary libraries are installed or reinstalled
!pip uninstall -y scipy
!pip install scipy
!pip install --upgrade --no-deps --force-reinstall numpy scipy scikit-learn pandas tensorflow

# Import libraries
import numpy as np  # Import numpy before other libraries
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
from tensorflow.keras.utils import to_categorical

# File upload for Google Colab
from google.colab import files

# Upload the dataset
print("Please upload the 'train.txt' file:")
uploaded = files.upload()

# Load the dataset
try:
    data = pd.read_csv("train.txt", sep=';')
    data.columns = ["Text", "Emotions"]
    print("Dataset loaded successfully:")
    print(data.head())
except FileNotFoundError:
    print("Error: 'train.txt' file not found. Please ensure the file exists in the script's directory.")
    exit()

# Check for missing or invalid data
if data.isnull().values.any():
    print("Warning: Missing values found in the dataset. Filling missing values with empty strings.")
    data["Text"] = data["Text"].fillna("")
    data["Emotions"] = data["Emotions"].fillna("")

# Convert text and labels into lists
texts = data["Text"].tolist()
labels = data["Emotions"].tolist()

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
max_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_length)
print(f"Text tokenized and padded. Max length: {max_length}")

# Create a vocabulary of unique emotions and assign integer labels
emotion_vocab = {emotion: index for index, emotion in enumerate(set(labels))}

# Encode labels using the vocabulary
labels = [emotion_vocab[emotion] for emotion in labels]

# One-hot encode the labels
one_hot_labels = to_categorical(labels)

# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(
    padded_sequences,
    one_hot_labels,
    test_size=0.2,
    random_state=42
)
print("Data split into training and testing sets.")

# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1,
                    output_dim=128,
                    input_length=max_length))
model.add(Flatten())
model.add(Dense(units=128, activation="relu"))
model.add(Dense(units=len(one_hot_labels[0]), activation="softmax"))

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
print("Model compiled successfully.")

# Train the model
print("Training the model...")
model.fit(xtrain, ytrain, epochs=10, batch_size=32, validation_data=(xtest, ytest))

# Predict on a new input
input_text = "i am ever feeling nostalgic about the fireplace i will know that it is still on the property"
print(f"Predicting for input text: {input_text}")

# Preprocess the input text
input_sequence = tokenizer.texts_to_sequences([input_text])
padded_input_sequence = pad_sequences(input_sequence, maxlen=max_length)

# Make a prediction
prediction = model.predict(padded_input_sequence)
predicted_label_index = np.argmax(prediction[0])

# Get the emotion label from the vocabulary
predicted_label = list(emotion_vocab.keys())[list(emotion_vocab.values()).index(predicted_label_index)]

print(f"Predicted Emotion: {predicted_label}")


Found existing installation: scipy 1.14.1
Uninstalling scipy-1.14.1:
  Successfully uninstalled scipy-1.14.1
[0mCollecting scipy
  Using cached scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Using cached scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.2 MB)
[0mInstalling collected packages: scipy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.2.0 which is incompatible.
gensim 4.3.3 requires scipy<1.14.0,>=1.7.0, but you have scipy 1.14.1 which is incompatible.
pytensor 2.26.4 requires numpy<2,>=1.17.0, but you have numpy 2.2.0 which is incompatible.[0m[31m
[0mSuccessfully installed scipy-1.14.1
[0mCollecting numpy
  Using cached numpy-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Col


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ke

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    app.start()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ke

AttributeError: _ARRAY_API not found

Please upload the 'train.txt' file:


Saving train.txt to train (2).txt
Dataset loaded successfully:
                                                Text Emotions
0  i can go from feeling so hopeless to so damned...  sadness
1   im grabbing a minute to post i feel greedy wrong    anger
2  i am ever feeling nostalgic about the fireplac...     love
3                               i am feeling grouchy    anger
4  ive been feeling a little burdened lately wasn...  sadness
Text tokenized and padded. Max length: 66
Data split into training and testing sets.
Model compiled successfully.
Training the model...




Epoch 1/10
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 50ms/step - accuracy: 0.3759 - loss: 1.5359 - val_accuracy: 0.6906 - val_loss: 0.8576
Epoch 2/10
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 73ms/step - accuracy: 0.8737 - loss: 0.4111 - val_accuracy: 0.8244 - val_loss: 0.5181
Epoch 3/10
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 48ms/step - accuracy: 0.9865 - loss: 0.0540 - val_accuracy: 0.8213 - val_loss: 0.5770
Epoch 4/10
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 47ms/step - accuracy: 0.9935 - loss: 0.0304 - val_accuracy: 0.8294 - val_loss: 0.6182
Epoch 5/10
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 48ms/step - accuracy: 0.9968 - loss: 0.0154 - val_accuracy: 0.8269 - val_loss: 0.6204
Epoch 6/10
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 49ms/step - accuracy: 0.9972 - loss: 0.0139 - val_accuracy: 0.8256 - val_loss: 0.6891
Epoch 7/10
[1m4