In [2]:
import pandas as pd
df=pd.read_csv("twitter_training.csv",header=None,
names=["tweet_id","entity","sentiment","text"])

In [3]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74682 entries, 0 to 74681
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   tweet_id   74682 non-null  int64 
 1   entity     74682 non-null  object
 2   sentiment  74682 non-null  object
 3   text       73996 non-null  object
dtypes: int64(1), object(3)
memory usage: 2.3+ MB


In [5]:
df.isnull().sum().sum()

np.int64(686)

In [6]:
df.head()

Unnamed: 0,tweet_id,entity,sentiment,text
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...


In [7]:
df.fillna(0,inplace=True)

In [25]:

valid = ["Negative", "Neutral", "Positive"]
df= df[df["sentiment"].isin(valid)]

In [26]:
sent_map = {"Negative":0,"Neutral":1,"Positive":2}
df["label"]= df["sentiment"].map(sent_map)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"], test_size=0.2, random_state=42
)

In [28]:
X_train = X_train.astype(str)
X_test = X_test.astype(str)

In [29]:
X_train = X_train.fillna("").astype(str)
X_test = X_test.fillna("").astype(str)

In [30]:
tokenizer = Tokenizer(num_words=20000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)


In [31]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [32]:
max_len = 40
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding="post")
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding="post")

In [33]:
df.head()

Unnamed: 0,tweet_id,entity,sentiment,text,label
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...,2
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...,2
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...,2
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...,2
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...,2


In [34]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

model = Sequential([
    Embedding(input_dim=20000, output_dim=64, input_length=max_len),
    SimpleRNN(64, return_sequences=False),
    Dense(32, activation="relu"),
    Dense(3, activation="softmax")
])



In [35]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)


In [36]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [37]:

# Explicitly build the model before printing the summary
model.build(input_shape=(None, max_len))
model.summary()

In [40]:
history = model.fit(
    X_train_pad, y_train,
    validation_split=0.2,
    epochs=5,
    batch_size=64
)


loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)

Epoch 1/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.9612 - loss: 0.0863 - val_accuracy: 0.8297 - val_loss: 0.6400
Epoch 2/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - accuracy: 0.9649 - loss: 0.0780 - val_accuracy: 0.8307 - val_loss: 0.6595
Epoch 3/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 22ms/step - accuracy: 0.9624 - loss: 0.0832 - val_accuracy: 0.8141 - val_loss: 0.6462
Epoch 4/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - accuracy: 0.9665 - loss: 0.0732 - val_accuracy: 0.8241 - val_loss: 0.6357
Epoch 5/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - accuracy: 0.9658 - loss: 0.0769 - val_accuracy: 0.8328 - val_loss: 0.6565
[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8313 - loss: 0.6491
Test Accuracy: 0.8312667012214661


In [42]:
import numpy as np

In [43]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = model.predict(X_test_pad)
y_pred_labels = np.argmax(y_pred, axis=1)

[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step


In [44]:
print(classification_report(y_test, y_pred_labels))

              precision    recall  f1-score   support

           0       0.83      0.82      0.83      4509
           1       0.85      0.80      0.82      3650
           2       0.82      0.87      0.84      4180

    accuracy                           0.83     12339
   macro avg       0.83      0.83      0.83     12339
weighted avg       0.83      0.83      0.83     12339



In [45]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense,LSTM


In [46]:
model = Sequential([
    Embedding(input_dim=20000, output_dim=64, input_length=max_len),
    LSTM(64, return_sequences=False),
    Dense(32, activation="relu"),
    Dense(3, activation="softmax")
])



In [47]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [48]:
# Explicitly build the model before printing the summary
model.build(input_shape=(None, max_len))
model.summary()

history = model.fit(
    X_train_pad, y_train,
    validation_split=0.2,
    epochs=5,
    batch_size=64
)

loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)

Epoch 1/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 35ms/step - accuracy: 0.5589 - loss: 0.8891 - val_accuracy: 0.7296 - val_loss: 0.7101
Epoch 2/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 36ms/step - accuracy: 0.8033 - loss: 0.5065 - val_accuracy: 0.8196 - val_loss: 0.4628
Epoch 3/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 43ms/step - accuracy: 0.8796 - loss: 0.3227 - val_accuracy: 0.8445 - val_loss: 0.4254
Epoch 4/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 36ms/step - accuracy: 0.9044 - loss: 0.2424 - val_accuracy: 0.8596 - val_loss: 0.3810
Epoch 5/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 37ms/step - accuracy: 0.9171 - loss: 0.2032 - val_accuracy: 0.8606 - val_loss: 0.3988
[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8594 - loss: 0.3999
Test Accuracy: 0.8593889474868774


In [50]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = model.predict(X_test_pad)
y_pred_labels = np.argmax(y_pred, axis=1)

[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step


In [51]:
print(classification_report(y_test, y_pred_labels))

              precision    recall  f1-score   support

           0       0.86      0.89      0.88      4509
           1       0.89      0.79      0.84      3650
           2       0.84      0.88      0.86      4180

    accuracy                           0.86     12339
   macro avg       0.86      0.86      0.86     12339
weighted avg       0.86      0.86      0.86     12339



In [52]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense,LSTM,Bidirectional

In [53]:
model = Sequential([
    Embedding(input_dim=20000, output_dim=64, input_length=max_len),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dense(32, activation="relu"),
    Dense(3, activation="softmax")
])



In [54]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [55]:
# Explicitly build the model before printing the summary
model.build(input_shape=(None, max_len))
model.summary()

history = model.fit(
    X_train_pad, y_train,
    validation_split=0.2,
    epochs=5,
    batch_size=64
)

loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)

Epoch 1/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 63ms/step - accuracy: 0.7004 - loss: 0.6866 - val_accuracy: 0.8119 - val_loss: 0.4936
Epoch 2/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 65ms/step - accuracy: 0.8742 - loss: 0.3264 - val_accuracy: 0.8602 - val_loss: 0.3690
Epoch 3/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 58ms/step - accuracy: 0.9188 - loss: 0.2066 - val_accuracy: 0.8801 - val_loss: 0.3274
Epoch 4/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 58ms/step - accuracy: 0.9370 - loss: 0.1541 - val_accuracy: 0.8856 - val_loss: 0.3345
Epoch 5/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 58ms/step - accuracy: 0.9465 - loss: 0.1266 - val_accuracy: 0.8857 - val_loss: 0.3490
[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.8809 - loss: 0.3384
Test Accuracy: 0.8809465765953064


In [56]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = model.predict(X_test_pad)
y_pred_labels = np.argmax(y_pred, axis=1)

[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 26ms/step


In [57]:
print(classification_report(y_test, y_pred_labels))

              precision    recall  f1-score   support

           0       0.86      0.92      0.89      4509
           1       0.90      0.86      0.88      3650
           2       0.90      0.86      0.88      4180

    accuracy                           0.88     12339
   macro avg       0.88      0.88      0.88     12339
weighted avg       0.88      0.88      0.88     12339



In [58]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense,LSTM,Bidirectional,GRU # great recurrent unit

In [59]:
model = Sequential([
    Embedding(input_dim=20000, output_dim=64, input_length=max_len),
    GRU(64, return_sequences=False),
    Dense(32, activation="relu"),
    Dense(3, activation="softmax")
])




In [60]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [61]:
# Explicitly build the model before printing the summary
model.build(input_shape=(None, max_len))
model.summary()

history = model.fit(
    X_train_pad, y_train,
    validation_split=0.2,
    epochs=5,
    batch_size=64
)

loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)

Epoch 1/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 64ms/step - accuracy: 0.5156 - loss: 0.9338 - val_accuracy: 0.7169 - val_loss: 0.6636
Epoch 2/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 67ms/step - accuracy: 0.8256 - loss: 0.4444 - val_accuracy: 0.8434 - val_loss: 0.3999
Epoch 3/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 51ms/step - accuracy: 0.9015 - loss: 0.2494 - val_accuracy: 0.8661 - val_loss: 0.3618
Epoch 4/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 34ms/step - accuracy: 0.9269 - loss: 0.1780 - val_accuracy: 0.8720 - val_loss: 0.3572
Epoch 5/5
[1m617/617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 30ms/step - accuracy: 0.9400 - loss: 0.1414 - val_accuracy: 0.8755 - val_loss: 0.3725
[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8761 - loss: 0.3602
Test Accuracy: 0.8760839700698853


In [62]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = model.predict(X_test_pad)
y_pred_labels = np.argmax(y_pred, axis=1)

[1m386/386[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step


In [63]:
print(classification_report(y_test, y_pred_labels))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88      4509
           1       0.87      0.87      0.87      3650
           2       0.91      0.85      0.88      4180

    accuracy                           0.88     12339
   macro avg       0.88      0.87      0.88     12339
weighted avg       0.88      0.88      0.88     12339

