In [76]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

In [77]:
train_df = pd.read_csv(r'D:\College\pccoe\5th sem\CV\Practicals\Final_practicals\something-main\something-main\twitter_training.csv')
train_df

Unnamed: 0,2401,Borderlands,Positive,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...
...,...,...,...,...
74676,9200,Nvidia,Positive,Just realized that the Windows partition of my...
74677,9200,Nvidia,Positive,Just realized that my Mac window partition is ...
74678,9200,Nvidia,Positive,Just realized the windows partition of my Mac ...
74679,9200,Nvidia,Positive,Just realized between the windows partition of...


In [78]:
train_df = train_df.drop(columns=['2401', 'Borderlands'])

In [79]:
train_df.columns = ['Sentiment', 'statement']

In [80]:
train_df

Unnamed: 0,Sentiment,statement
0,Positive,I am coming to the borders and I will kill you...
1,Positive,im getting on borderlands and i will kill you ...
2,Positive,im coming on borderlands and i will murder you...
3,Positive,im getting on borderlands 2 and i will murder ...
4,Positive,im getting into borderlands and i can murder y...
...,...,...
74676,Positive,Just realized that the Windows partition of my...
74677,Positive,Just realized that my Mac window partition is ...
74678,Positive,Just realized the windows partition of my Mac ...
74679,Positive,Just realized between the windows partition of...


In [81]:
train_df['Sentiment'].value_counts()

Sentiment
Negative      22542
Positive      20831
Neutral       18318
Irrelevant    12990
Name: count, dtype: int64

In [82]:
train_df['statement'] = train_df['statement'].astype(str)

In [98]:
labels = {'Positive' : 0, 'Negative' : 1, 'Neutral' : 2, 'Irrelevant' : 3}

In [84]:
train_df['Sentiment'] = train_df['Sentiment'].apply(lambda x: labels[x])

In [85]:
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_df['statement'])
seq = tokenizer.texts_to_sequences(train_df['statement'])
pad_seq = pad_sequences(seq, maxlen=100, padding='post')

X_train, X_test, y_train, y_test = train_test_split(pad_seq, train_df['Sentiment'], test_size=0.2, random_state=42)

In [86]:
vocab_size = min(len(tokenizer.word_index) + 1, 10000)
num_classes = 4

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 128, input_length = 10000),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences = True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])



In [87]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [88]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

Epoch 1/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 61ms/step - accuracy: 0.5505 - loss: 1.0478 - val_accuracy: 0.7534 - val_loss: 0.6499
Epoch 2/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 61ms/step - accuracy: 0.8207 - loss: 0.4853 - val_accuracy: 0.8129 - val_loss: 0.5006
Epoch 3/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 61ms/step - accuracy: 0.8800 - loss: 0.3217 - val_accuracy: 0.8281 - val_loss: 0.4685
Epoch 4/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 62ms/step - accuracy: 0.9092 - loss: 0.2371 - val_accuracy: 0.8437 - val_loss: 0.4575
Epoch 5/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 63ms/step - accuracy: 0.9259 - loss: 0.1899 - val_accuracy: 0.8530 - val_loss: 0.4456
Epoch 6/10
[1m1867/1867[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 64ms/step - accuracy: 0.9384 - loss: 0.1565 - val_accuracy: 0.8534 - val_loss: 0.474

In [124]:
texts = ['well you know what nothing i dont know what i am going to do']
seq_pred = tokenizer.texts_to_sequences(texts)
pad_seq_pred = pad_sequences(seq_pred, maxlen = 100, padding='post')

preds = model.predict(pad_seq_pred)
conf = preds.max(axis = 1)
label = preds.argmax(axis = 1)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [125]:
print(conf, label, labels)

[0.7550553] [0] {'Positive': 0, 'Negative': 1, 'Neutral': 2, 'Irrelevant': 3}
