# Importing Libraries..

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import re

### Load Dataset and check Dataset

In [2]:
df = pd.read_csv(r'C:\Users\Admin\Downloads\Sentiment.csv')
df.head()

Unnamed: 0,id,candidate,candidate_confidence,relevant_yn,relevant_yn_confidence,sentiment,sentiment_confidence,subject_matter,subject_matter_confidence,candidate_gold,...,relevant_yn_gold,retweet_count,sentiment_gold,subject_matter_gold,text,tweet_coord,tweet_created,tweet_id,tweet_location,user_timezone
0,1,No candidate mentioned,1.0,yes,1.0,Neutral,0.6578,None of the above,1.0,,...,,5,,,RT @NancyLeeGrahn: How did everyone feel about...,,2015-08-07 09:54:46 -0700,629697200650592256,,Quito
1,2,Scott Walker,1.0,yes,1.0,Positive,0.6333,None of the above,1.0,,...,,26,,,RT @ScottWalker: Didn't catch the full #GOPdeb...,,2015-08-07 09:54:46 -0700,629697199560069120,,
2,3,No candidate mentioned,1.0,yes,1.0,Neutral,0.6629,None of the above,0.6629,,...,,27,,,RT @TJMShow: No mention of Tamir Rice and the ...,,2015-08-07 09:54:46 -0700,629697199312482304,,
3,4,No candidate mentioned,1.0,yes,1.0,Positive,1.0,None of the above,0.7039,,...,,138,,,RT @RobGeorge: That Carly Fiorina is trending ...,,2015-08-07 09:54:45 -0700,629697197118861312,Texas,Central Time (US & Canada)
4,5,Donald Trump,1.0,yes,1.0,Positive,0.7045,None of the above,1.0,,...,,156,,,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,,2015-08-07 09:54:45 -0700,629697196967903232,,Arizona


In [3]:
df["sentiment"].unique()

array(['Neutral', 'Positive', 'Negative'], dtype=object)

In [4]:
df.columns

Index(['id', 'candidate', 'candidate_confidence', 'relevant_yn',
       'relevant_yn_confidence', 'sentiment', 'sentiment_confidence',
       'subject_matter', 'subject_matter_confidence', 'candidate_gold', 'name',
       'relevant_yn_gold', 'retweet_count', 'sentiment_gold',
       'subject_matter_gold', 'text', 'tweet_coord', 'tweet_created',
       'tweet_id', 'tweet_location', 'user_timezone'],
      dtype='object')

In [5]:
df.shape

(13871, 21)

In [6]:
df["text"].head()

0    RT @NancyLeeGrahn: How did everyone feel about...
1    RT @ScottWalker: Didn't catch the full #GOPdeb...
2    RT @TJMShow: No mention of Tamir Rice and the ...
3    RT @RobGeorge: That Carly Fiorina is trending ...
4    RT @DanScavino: #GOPDebate w/ @realDonaldTrump...
Name: text, dtype: object

In [7]:
df = df[["text" , "sentiment"]]
df.head()

Unnamed: 0,text,sentiment
0,RT @NancyLeeGrahn: How did everyone feel about...,Neutral
1,RT @ScottWalker: Didn't catch the full #GOPdeb...,Positive
2,RT @TJMShow: No mention of Tamir Rice and the ...,Neutral
3,RT @RobGeorge: That Carly Fiorina is trending ...,Positive
4,RT @DanScavino: #GOPDebate w/ @realDonaldTrump...,Positive


In [8]:
df = df[df.sentiment != 'Neutral']
df['sentiment'].unique()

array(['Positive', 'Negative'], dtype=object)

# Q1. Print no.of pos and neg comments

In [9]:
df['sentiment'].value_counts()

Negative    8493
Positive    2236
Name: sentiment, dtype: int64

In [10]:
max_features= 2000
tokenizer = keras.preprocessing.text.Tokenizer(num_words=max_features , split=' ')
tokenizer.fit_on_texts(df['text'].values)
X = tokenizer.texts_to_sequences(df['text'].values)
X = keras.preprocessing.sequence.pad_sequences(X)

X.shape

(10729, 29)

### splitting the data

In [18]:
y = pd.get_dummies(df['sentiment']).values
validation_size = 1500
train_x , test_x , train_y , test_y = train_test_split(X , y , test_size = 0.2 , random_state = 42 , shuffle = True)
X_valid , y_valid = test_x[:validation_size] , test_y[:validation_size]
test_x , test_y = test_x[validation_size:] , test_y[validation_size:]
train_x.shape , X_valid.shape , test_x.shape

((8583, 29), (1500, 29), (646, 29))

# Q2. Building the LSTM model


In [19]:
embed_dim = 128
lstm_out = 196

model = keras.models.Sequential([
    keras.layers.Embedding(max_features , embed_dim , input_length = X.shape[1]),
    keras.layers.SpatialDropout1D(0.3),
    keras.layers.LSTM(lstm_out , dropout = 0.2 , recurrent_dropout = 0.2),
    keras.layers.Dense(2 , activation = 'softmax')
])

model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 29, 128)           256000    
                                                                 
 spatial_dropout1d_1 (Spatia  (None, 29, 128)          0         
 lDropout1D)                                                     
                                                                 
 lstm_1 (LSTM)               (None, 196)               254800    
                                                                 
 dense_1 (Dense)             (None, 2)                 394       
                                                                 
Total params: 511,194
Trainable params: 511,194
Non-trainable params: 0
_________________________________________________________________


### fitting the model

In [None]:
batch_size = 32
model.fit(train_x , train_y , batch_size = batch_size , epochs = 10 ,verbose = 2,  validation_data=(X_valid , y_valid))

Epoch 1/10
269/269 - 28s - loss: 0.4148 - accuracy: 0.8230 - val_loss: 0.3649 - val_accuracy: 0.8407 - 28s/epoch - 104ms/step
Epoch 2/10
269/269 - 27s - loss: 0.3016 - accuracy: 0.8696 - val_loss: 0.3531 - val_accuracy: 0.8547 - 27s/epoch - 101ms/step
Epoch 3/10
269/269 - 27s - loss: 0.2682 - accuracy: 0.8880 - val_loss: 0.3721 - val_accuracy: 0.8593 - 27s/epoch - 101ms/step
Epoch 4/10
269/269 - 27s - loss: 0.2390 - accuracy: 0.9012 - val_loss: 0.3658 - val_accuracy: 0.8533 - 27s/epoch - 100ms/step
Epoch 5/10
269/269 - 27s - loss: 0.2157 - accuracy: 0.9098 - val_loss: 0.4155 - val_accuracy: 0.8500 - 27s/epoch - 100ms/step
Epoch 6/10
269/269 - 27s - loss: 0.1925 - accuracy: 0.9200 - val_loss: 0.4609 - val_accuracy: 0.8493 - 27s/epoch - 101ms/step
Epoch 7/10
269/269 - 28s - loss: 0.1735 - accuracy: 0.9292 - val_loss: 0.4581 - val_accuracy: 0.8333 - 28s/epoch - 103ms/step
Epoch 8/10
269/269 - 27s - loss: 0.1616 - accuracy: 0.9316 - val_loss: 0.4865 - val_accuracy: 0.8420 - 27s/epoch - 100

### evaluate the model

In [None]:
score , accuracy = model.evaluate(test_x , test_y , verbose = 2 , batch_size = batch_size)
print("score : %.2f"%score)
print("accuracy : %.2f"%accuracy)

# Q3. Checking positive or negative

### test a predicted tweet

#### Testcase 1 : 'He is a great leader.'

In [None]:
twt = ['He is a great leader.']
twt = tokenizer.texts_to_sequences(twt)
twt = keras.preprocessing.sequence.pad_sequences(twt , maxlen= 29 , dtype = 'int32' , value = 0)
print(twt)

In [16]:
sentiment = model.predict(twt , batch_size = None , verbose = 2)[0]

1/1 - 0s - 318ms/epoch - 318ms/step


In [17]:
if(np.argmax(sentiment) == 0):
    print("negative")
elif (np.argmax(sentiment) == 1):
    print("positive")

positive


#### Testcase 2 : 'He is a terrible leader'

In [21]:
twt = ['He is a terrible leader']
twt = tokenizer.texts_to_sequences(twt)
twt = keras.preprocessing.sequence.pad_sequences(twt , maxlen= 30 , dtype = 'int32' , value = 0)
print(twt)

[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0   37    6    8
  1009  347]]


In [22]:
sentiment = model.predict(twt , batch_size = None , verbose = 2)[0]

1/1 - 0s - 319ms/epoch - 319ms/step


In [23]:
if(np.argmax(sentiment) == 0):
    print("negative")
elif (np.argmax(sentiment) == 1):
    print("positive")

negative
