In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder


# Завантаження даних
# Assuming 'tweet_emotions.csv' is in a folder named 'data'
df = pd.read_csv('/content/drive/MyDrive/tweet_emotions.csv')  # Changed the path to include 'data/'
texts = df['content']
labels = df['sentiment']

# Розділення даних
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Токенізація тексту
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=100)
X_test_seq = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=100)

# Encode labels to numerical representation
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Get the number of unique classes
num_classes = len(label_encoder.classes_)

# Update the model to have the correct number of output units
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=5000, output_dim=128, input_length=100),
    tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    tf.keras.layers.Dense(num_classes, activation='softmax')  # Updated to num_classes
])
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Навчання
history = model.fit(X_train_seq, y_train_encoded, epochs=5, batch_size=32, validation_split=0.2)

y_pred = np.argmax(model.predict(X_test_seq), axis=1)
# Convert predictions back to original labels
y_pred_labels = label_encoder.inverse_transform(y_pred)
print(classification_report(y_test, y_pred_labels))
print("Accuracy:", accuracy_score(y_test, y_pred_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        19
     boredom       0.00      0.00      0.00        31
       empty       0.00      0.00      0.00       162
  enthusiasm       0.20      0.01      0.01       163
         fun       0.09      0.07      0.08       338
   happiness       0.31      0.36      0.33      1028
        hate       0.31      0.22      0.26       268
        love       0.37      0.46      0.41       762
     neutral       0.36      0.45      0.40      1740
      relief       0.24      0.06      0.09       352
     sadness       0.30      0.25      0.27      1046
    surprise       0.13      0.07      0.09       425
       worry       0.33      0.41      0.37      1666

    accuracy                           0.32      8000
   macro avg       0.20      0.18      0.18      8000
weighted avg       0.30      0.32      0.30      8000

Accuracy: 0.323


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
!pip install tensorflow


