In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
df = pd.read_csv('sentiment_analysis.csv')
df.head()

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teachers reading on slide. Just Nonsence,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram


In [6]:
df.shape

(499, 7)

In [17]:
df = df.iloc[0:, 4:6]
df.head()

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teachers reading on slide. Just Nonsence,negative
4,"Those who want to go, let them go",negative


In [76]:
df.sentiment.value_counts()

neutral     199
positive    166
negative    134
Name: sentiment, dtype: int64

In [114]:
voc_length = 10000
max_length = 100
max_dim = 50

In [115]:
texts = [text for text in df.text]
sentiments = [sent for sent in df.sentiment]

In [118]:
embedding_docs = [one_hot(words, voc_length) for words in texts]

In [119]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
sentiments = le.fit_transform(sentiments)

In [151]:
unique_ele, counts = np.unique(sentiments, return_counts=True)
unique_ele, counts

(array([0, 1, 2], dtype=int64), array([134, 199, 166], dtype=int64))

In [120]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(embedding_docs, sentiments, test_size=0.2, random_state=42)
len(x_train), len(x_test), len(y_train), len(y_test)

(399, 100, 399, 100)

In [123]:
x_train = pad_sequences(x_train, maxlen=max_len, padding='pre')
x_test = pad_sequences(x_test, maxlen=max_len, padding='pre')

In [130]:
model = Sequential()
model.add(Embedding(input_dim=voc_length, output_dim=max_dim))
model.add(SimpleRNN(128, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.build(input_shape=(None, max_len))

In [131]:
model.summary()

In [126]:
loss = tf.keras.losses.SparseCategoricalCrossentropy()

In [132]:
model.compile(optimizer='Adam', loss=loss, metrics=['accuracy'])

In [133]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [134]:
history = model.fit(x_train, y_train, callbacks=early_stop, validation_split=0.2, epochs=10, batch_size=32)

Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 111ms/step - accuracy: 0.3414 - loss: 1.0984 - val_accuracy: 0.5625 - val_loss: 1.0764
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.5336 - loss: 1.0600 - val_accuracy: 0.4750 - val_loss: 1.0270
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.4656 - loss: 0.9964 - val_accuracy: 0.5875 - val_loss: 1.0247
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.6992 - loss: 0.9318 - val_accuracy: 0.5875 - val_loss: 0.9882
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step - accuracy: 0.8962 - loss: 0.8516 - val_accuracy: 0.6250 - val_loss: 0.9639
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.8897 - loss: 0.6714 - val_accuracy: 0.6375 - val_loss: 0.8805
Epoch 7/10
[1m10/10[0m [32m━━━

In [154]:
def preprocess(text, voc_length=10000, max_length=100):
    one_hot_seq = one_hot(text, voc_length)
    embedded_sent = pad_sequences([one_hot_seq], maxlen=max_length, padding='pre')
    return embedded_sent

In [156]:
def predict(text):
    embeds = preprocess(text)
    prediction = model.predict(embeds)
    predicted_class = np.argmax(prediction)
    label_mapping = {0:'Negative', 1: 'Positive', 2:'Neutral'}
    predicted_sentiment = label_mapping[predicted_class]
    return f"Predicted Sentiment ===> {predicted_sentiment}"

In [169]:
example1 = 'congratulations ! you guys finish a month early than we do. booo'
predict(example1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


'Predicted Sentiment ===> Neutral'