In [149]:
import pandas as pd

features = ["text", "sentiment"]

train_df = pd.read_csv("./Dataset/train.csv")[features]
test_df = pd.read_csv("./Dataset/test.csv")[features]

train_df, test_df = train_df.dropna(), test_df.dropna()


print(f"Train shape: {train_df.shape}")
print(f"Test shape: {test_df.shape}")

Train shape: (27480, 2)
Test shape: (3534, 2)


In [150]:
train_df.head(3)

Unnamed: 0,text,sentiment
0,"I`d have responded, if I were going",neutral
1,Sooo SAD I will miss you here in San Diego!!!,negative
2,my boss is bullying me...,negative


In [151]:
X_train, Y_train = train_df["text"], train_df["sentiment"]

X_test, Y_test = test_df["text"], test_df["sentiment"]


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


<h2>Preprocess Text Data</h2>


In [153]:
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")

tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)

X_test_seq = tokenizer.texts_to_sequences(X_test)

In [154]:
max_length = 100

X_train_padded = pad_sequences(X_train_seq, maxlen=max_length, padding='post')

X_test_padded = pad_sequences(X_test_seq, maxlen=max_length, padding='post')

In [186]:
sentiment_mapping = {"negative": 0, "neutral": 1, "positive": 2}
ind_mapping = {0: "negative", 1: "neutral", 2: "positive"}

Y_train_enc = Y_train.map(sentiment_mapping)
Y_test_enc = Y_test.map(sentiment_mapping)

In [156]:
y_train_one_hot = tf.keras.utils.to_categorical(Y_train_enc, num_classes=3)
y_test_one_hot = tf.keras.utils.to_categorical(Y_test_enc, num_classes=3)

In [172]:

model = keras.Sequential([
    Embedding(input_dim=10000, output_dim=32, input_length=max_length),
    Bidirectional(SimpleRNN(64, activation='relu')),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


model.summary()

In [173]:
history = model.fit(X_train_padded, y_train_one_hot, epochs=5, batch_size=32, validation_data=(X_test_padded, y_test_one_hot))

Epoch 1/5
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 103ms/step - accuracy: 0.5059 - loss: 0.9685 - val_accuracy: 0.6907 - val_loss: 0.7205
Epoch 2/5
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 52ms/step - accuracy: 0.7513 - loss: 0.6218 - val_accuracy: 0.7083 - val_loss: 0.6908
Epoch 3/5
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 36ms/step - accuracy: 0.8085 - loss: 0.4876 - val_accuracy: 0.7003 - val_loss: 0.7078
Epoch 4/5
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 44ms/step - accuracy: 0.8514 - loss: 0.3933 - val_accuracy: 0.7187 - val_loss: 0.7608
Epoch 5/5
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 60ms/step - accuracy: 0.8829 - loss: 0.3211 - val_accuracy: 0.7057 - val_loss: 0.8295


In [174]:
loss, accuracy = model.evaluate(X_test_padded, y_test_one_hot)

print(f"Test Accuracy: {accuracy:.4f}")

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7010 - loss: 0.8372
Test Accuracy: 0.7057


In [193]:
test_sentence = ["This movie was amazing", "I've phoned you, but you didn't answer", "i hate when someone say that :<"]

test_sequence = tokenizer.texts_to_sequences(test_sentence)

test_padded = pad_sequences(test_sequence, maxlen=max_length, padding='post')

predictions = model.predict(test_padded)

for i ,prediction in enumerate(predictions):
    predictedClassInd = np.argmax(prediction)
    print(f"{test_sentence[i]} => {ind_mapping[predictedClassInd]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
This movie was amazing => positive
I've phoned you, but you didn't answer => neutral
i hate when someone say that :< => negative
