In [1]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout,Embedding,LSTM,Bidirectional,SpatialDropout1D

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import LabelEncoder


In [2]:
df=pd.read_csv('/content/drive/MyDrive/Sentiment.csv')

In [3]:
df.head(5)

Unnamed: 0,id,candidate,candidate_confidence,relevant_yn,relevant_yn_confidence,sentiment,sentiment_confidence,subject_matter,subject_matter_confidence,candidate_gold,...,relevant_yn_gold,retweet_count,sentiment_gold,subject_matter_gold,text,tweet_coord,tweet_created,tweet_id,tweet_location,user_timezone
0,1,No candidate mentioned,1.0,yes,1.0,Neutral,0.6578,None of the above,1.0,,...,,5,,,RT @NancyLeeGrahn: How did everyone feel about...,,2015-08-07 09:54:46 -0700,629697200650592256,,Quito
1,2,Scott Walker,1.0,yes,1.0,Positive,0.6333,None of the above,1.0,,...,,26,,,RT @ScottWalker: Didn't catch the full #GOPdeb...,,2015-08-07 09:54:46 -0700,629697199560069120,,
2,3,No candidate mentioned,1.0,yes,1.0,Neutral,0.6629,None of the above,0.6629,,...,,27,,,RT @TJMShow: No mention of Tamir Rice and the ...,,2015-08-07 09:54:46 -0700,629697199312482304,,
3,4,No candidate mentioned,1.0,yes,1.0,Positive,1.0,None of the above,0.7039,,...,,138,,,RT @RobGeorge: That Carly Fiorina is trending ...,,2015-08-07 09:54:45 -0700,629697197118861312,Texas,Central Time (US & Canada)
4,5,Donald Trump,1.0,yes,1.0,Positive,0.7045,None of the above,1.0,,...,,156,,,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,,2015-08-07 09:54:45 -0700,629697196967903232,,Arizona


In [4]:
df=df[['text','sentiment']]

In [5]:
df.head(5)

Unnamed: 0,text,sentiment
0,RT @NancyLeeGrahn: How did everyone feel about...,Neutral
1,RT @ScottWalker: Didn't catch the full #GOPdeb...,Positive
2,RT @TJMShow: No mention of Tamir Rice and the ...,Neutral
3,RT @RobGeorge: That Carly Fiorina is trending ...,Positive
4,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,Positive


In [6]:
df=df[df.sentiment!='Neutral']

In [7]:
import re

In [8]:
df['text'] = df['text'].apply(lambda x: x.lower())
df['text'] = df['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]','',x)))

print(df[ df['sentiment'] == 'Positive'].size)
print(df[ df['sentiment'] == 'Negative'].size)

4472
16986


In [9]:
from keras.utils import to_categorical
encoder = LabelEncoder()
y = encoder.fit_transform(df['sentiment'])
y = to_categorical(y)


In [11]:
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(df['text'].values)
X = tokenizer.texts_to_sequences(df['text'].values)
X = pad_sequences(X)

In [12]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [13]:

model = Sequential()
model.add(Embedding(max_features, 128, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(Dense(196,activation='relu'))
model.add(Dropout(0.2))
model.add(LSTM(196, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(32,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(2,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])




In [14]:
model.fit(x_train, y_train, epochs = 5, batch_size=32, verbose = 2)

Epoch 1/5
269/269 - 31s - 115ms/step - accuracy: 0.8068 - loss: 0.4593
Epoch 2/5
269/269 - 38s - 142ms/step - accuracy: 0.8629 - loss: 0.3343
Epoch 3/5
269/269 - 41s - 154ms/step - accuracy: 0.8812 - loss: 0.2944
Epoch 4/5
269/269 - 41s - 151ms/step - accuracy: 0.8948 - loss: 0.2681
Epoch 5/5
269/269 - 41s - 153ms/step - accuracy: 0.9017 - loss: 0.2468


<keras.src.callbacks.history.History at 0x7c5a8e51fe50>

In [20]:
model.summary()

In [16]:
import numpy as np

In [17]:
pred=model.predict(x_test)
pred = np.argmax(pred, axis=1)
y_test_binary = np.argmax(y_test, axis=1)  # Convert y_test to binary format
print(accuracy_score(y_test_binary,pred))

[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 62ms/step
0.8378378378378378


In [18]:
twt = ['team do not work hard, project is very bad']
twt = tokenizer.texts_to_sequences(twt)
twt = pad_sequences(twt, maxlen=28, dtype='int32', value=0)
sentiment = model.predict(twt,batch_size=1,verbose = 2)[0]
if(np.argmax(sentiment) == 0):
    print("negative")
elif (np.argmax(sentiment) == 1):
    print("positive")

1/1 - 0s - 69ms/step
negative


In [19]:
twt2=['interstaller movie is great and amazing']
twt2 = tokenizer.texts_to_sequences(twt2)
twt2 = pad_sequences(twt2, maxlen=28, dtype='int32', value=0)
sentiment = model.predict(twt2,batch_size=1,verbose = 2)[0]
if(np.argmax(sentiment) == 0):
    print("negative")
elif (np.argmax(sentiment) == 1):
    print("positive")

1/1 - 0s - 23ms/step
positive
