In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import RandomOverSampler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from keras.callbacks import ProgbarLogger
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.optimizers import RMSprop

In [None]:
df = pd.read_csv("tweet_emotions.csv")
df.columns

In [None]:
df = df.reindex(columns=['tweet_id', 'content', 'sentiment'])

In [None]:
X = df.iloc[:,1].values.reshape(-1, 1)
y = df.iloc[:,-1].values
over = RandomOverSampler()
X, y = over.fit_resample(X, y)

In [None]:
# Create a label encoder object
label_encoder = LabelEncoder()

# Perform label encoding on y
y_encoded = label_encoder.fit_transform(y) + 1

# Reshape 'content' array to be 1-dimensional
X_reshaped = X.reshape(-1)

# Create a new DataFrame with 'sentiment' and 'content' columns
df = pd.DataFrame({'content': X_reshaped,'sentiment': y_encoded})

In [None]:
df.head()

#improve the model's performance:
1- Increase the model's capacity

2- Increase the model's capacity

3- Use a different optimizer

4- Increase the number of training epochs

5- Perform data preprocessing

6- Perform data preprocessing

7- Perform data preprocessing


In [None]:
# Preprocess the data
df['content'] = df['content'].astype(str)  # Convert the 'content' column to string type
df['content'].fillna('', inplace=True)  # Fill missing values with empty strings

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])
vocab_size = len(tokenizer.word_index) + 1

# Pad the sequences to have the same length
max_length = 100  # adjust as needed
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Convert sentiment to one-hot encoded vectors
sentiment_vectors = tf.keras.utils.to_categorical(df['sentiment'])
num_classes = len(sentiment_vectors[0])

# Create the input and output data for the model
X = padded_sequences
y = sentiment_vectors

# Split the data into train and test sets
train_size = int(0.8 * len(X))
train_X, test_X = X[:train_size], X[train_size:]
train_y, test_y = y[:train_size], y[train_size:]

# Specify the dimensionality of the embedding
embedding_dim = 100

# Define the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

# Compile the model with RMSprop optimizer
optimizer = RMSprop(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_X, test_y)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


In [None]:
# Train the model
history = model.fit(train_X, train_y, epochs=20, batch_size=64, validation_data=(test_X, test_y))

In [None]:
last_accuracy = round(history.history['accuracy'][-1], 2)
last_loss = round(history.history['loss'][-1], 2)
val_accuracy = round(history.history['val_accuracy'][-1], 2)


print("Last Loss:", last_loss)
print("Validation Accuracy:", val_accuracy)
print("Last Accuracy:", last_accuracy)

In [None]:
# Preprocess the input text
input_text = "Layin n bed with a headache  ughhhh...waitin on your call..."
input_sequence = tokenizer.texts_to_sequences([input_text])
input_sequence = pad_sequences(input_sequence, maxlen=max_length)
# Get the model's prediction
prediction = model.predict(input_sequence)
sentiment_labels =['anger','boredom','empty','enthusiasm','fun','happiness','hate','love','neutral','relief','sadness','surprise','worry']
predicted_sentiment = sentiment_labels[np.argmax(prediction)]
# Print the output
print("Input Text:", input_text)
print("Predicted Sentiment:", predicted_sentiment)


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import RMSprop

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])
vocab_size = len(tokenizer.word_index) + 1

# Pad the sequences to have the same length
max_length = 100  # adjust as needed
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Convert sentiment to one-hot encoded vectors
sentiment_vectors = tf.keras.utils.to_categorical(df['sentiment'])
num_classes = len(sentiment_vectors[0])

# Create the input and output data for the model
X = padded_sequences
y = sentiment_vectors

# Split the data into train and test sets
train_size = int(0.8 * len(X))
train_X, test_X = X[:train_size], X[train_size:]
train_y, test_y = y[:train_size], y[train_size:]

# Specify the dimensionality of the embedding
embedding_dim = 100

# Define the model
T_model = tf.keras.Sequential()
T_model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length))
T_model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)))
T_model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))
T_model.add(tf.keras.layers.Dense(64, activation='relu'))
T_model.add(tf.keras.layers.Dropout(0.4))
T_model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))

# Compile the model with RMSprop optimizer
optimizer = RMSprop(learning_rate=0.001)
T_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Specify the number of epochs and batch size for training
epochs = 20
batch_size = 32

# Fit the model to the training data
T_model.fit(train_X, train_y, epochs=epochs, batch_size=batch_size)


In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = T_model.evaluate(test_X, test_y, verbose=0)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

In [None]:
# Preprocess the input text
input_text = "Layin n bed with a headache  ughhhh...waitin on your call..."
input_sequence = tokenizer.texts_to_sequences([input_text])
input_sequence = pad_sequences(input_sequence, maxlen=max_length)
# Get the model's prediction
prediction = T_model.predict(input_sequence)
sentiment_labels =['anger','boredom','empty','enthusiasm','fun','happiness','hate','love','neutral','relief','sadness','surprise','worry']
predicted_sentiment = sentiment_labels[np.argmax(prediction)]
# Print the output
print("Input Text:", input_text)
print("Predicted Sentiment:", predicted_sentiment)
