In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import re

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from keras.utils import to_categorical

In [6]:
data = pd.read_csv('/Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)



  row[0] = row[0].replace('rt', ' ')
  row[0] = row[0].replace('rt', ' ')


291/291 - 14s - 47ms/step - accuracy: 0.6415 - loss: 0.8345
144/144 - 2s - 16ms/step - accuracy: 0.6680 - loss: 0.7621
0.7620928287506104
0.6679772734642029
['loss', 'compile_metrics']


In [8]:
from keras.models import load_model
model.save('sentimentAnalysis.h5')
model= load_model('sentimentAnalysis.h5')



In [9]:
print(integer_encoded)
print(data['sentiment'])

[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object


In [10]:
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that is a great thing.@realDonaldTrump']
sentence = tokenizer.texts_to_sequences(sentence)
sentence = pad_sequences(sentence, maxlen=28, dtype='int32', value=0)
sentiment_probs = model.predict(sentence, batch_size=1, verbose=2)[0]
sentiment = np.argmax(sentiment_probs)

print(sentiment_probs)
if sentiment == 0:
    print("Neutral")
elif sentiment < 0:
    print("Negative")
elif sentiment > 0:
    print("Positive")
else:
    print("Cannot be determined")

1/1 - 0s - 180ms/step
[0.6395856  0.18788488 0.17252955]
Neutral


In [17]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from tensorflow import keras
from tensorflow.keras.layers import Dense
def createmodel(optimizer='adam'):
    """
    Creates and returns a compiled Keras sequential model.
    """
    model = keras.Sequential([
        Dense(12, input_shape=(X_train.shape[1],), activation='relu'),
        Dense(8, activation='relu'),
        Dense(3, activation='softmax')

    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model
model = KerasClassifier(model=createmodel,verbose=2)
batch_size= [10, 20, 40]
epochs = [1, 2]
param_grid= {'batch_size':batch_size, 'epochs':epochs, 'model__optimizer': ['adam', 'rmsprop']}
grid  = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4896 - loss: 18.5119
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.5079 - loss: 14.3789
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4420 - loss: 31.6930
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4757 - loss: 26.1203
186/186 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4590 - loss: 16.0081
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.5108 - loss: 10.3299
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4687 - loss: 18.3284
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4639 - loss: 34.1965
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4796 - loss: 34.0712
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4985 - loss: 14.1566
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4594 - loss: 29.7622
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.4840 - loss: 4.2249
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 3ms/step - accuracy: 0.4716 - loss: 21.5341
Epoch 2/2
744/744 - 1s - 926us/step - accuracy: 0.4891 - loss: 2.3584
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4762 - loss: 14.7298
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.5543 - loss: 2.0021
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 2s - 2ms/step - accuracy: 0.5088 - loss: 11.4029
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.5999 - loss: 1.3034
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4551 - loss: 24.9368
Epoch 2/2
744/744 - 1s - 914us/step - accuracy: 0.5076 - loss: 2.1318
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4728 - loss: 18.0777
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.5960 - loss: 1.1575
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.5617 - loss: 4.3324
Epoch 2/2
744/744 - 1s - 853us/step - accuracy: 0.6127 - loss: 1.0123
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.5472 - loss: 17.1588
Epoch 2/2
744/744 - 1s - 1ms/step - accuracy: 0.6080 - loss: 1.0547
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4834 - loss: 18.8299
Epoch 2/2
744/744 - 1s - 874us/step - accuracy: 0.5469 - loss: 1.4768
186/186 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


744/744 - 1s - 2ms/step - accuracy: 0.4820 - loss: 33.8734
Epoch 2/2
744/744 - 1s - 2ms/step - accuracy: 0.4826 - loss: 4.3282
186/186 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4985 - loss: 19.5677
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4853 - loss: 7.2797
93/93 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.3928 - loss: 40.3274
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.4260 - loss: 33.9026
93/93 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4689 - loss: 35.4898
93/93 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.4692 - loss: 18.9590
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.4280 - loss: 50.9838
93/93 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.4540 - loss: 34.3334
93/93 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.4249 - loss: 48.4991
93/93 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.5654 - loss: 8.4460
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4282 - loss: 24.0221
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.5192 - loss: 3.2557
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 4ms/step - accuracy: 0.3483 - loss: 68.2727
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.4508 - loss: 7.3331
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4447 - loss: 13.0932
Epoch 2/2
372/372 - 0s - 936us/step - accuracy: 0.5939 - loss: 1.3272
93/93 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4040 - loss: 21.9582
Epoch 2/2
372/372 - 0s - 927us/step - accuracy: 0.5426 - loss: 1.9927
93/93 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 3ms/step - accuracy: 0.4650 - loss: 33.5278
Epoch 2/2
372/372 - 0s - 885us/step - accuracy: 0.4453 - loss: 4.6885
93/93 - 0s - 1ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.4734 - loss: 19.9561
Epoch 2/2
372/372 - 0s - 845us/step - accuracy: 0.5958 - loss: 1.4936
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.4679 - loss: 12.9609
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.5398 - loss: 2.0123
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 2s - 5ms/step - accuracy: 0.3713 - loss: 49.7955
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.6082 - loss: 1.0540
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 2ms/step - accuracy: 0.5397 - loss: 25.4522
Epoch 2/2
372/372 - 1s - 2ms/step - accuracy: 0.6034 - loss: 1.3859
93/93 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


372/372 - 1s - 4ms/step - accuracy: 0.4980 - loss: 23.3958
Epoch 2/2
372/372 - 0s - 1ms/step - accuracy: 0.6009 - loss: 1.5940
93/93 - 0s - 1ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.3886 - loss: 76.0058
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.4902 - loss: 40.3662
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.4857 - loss: 35.2290
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 7ms/step - accuracy: 0.3705 - loss: 28.7402
47/47 - 0s - 3ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 6ms/step - accuracy: 0.4522 - loss: 56.5523
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4592 - loss: 30.6096
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4883 - loss: 49.0265
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4426 - loss: 61.0824
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4760 - loss: 98.4072
47/47 - 0s - 2ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.5098 - loss: 15.0382
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.3900 - loss: 62.8059
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.5406 - loss: 7.6450
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.3649 - loss: 129.1848
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.4459 - loss: 19.3941
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 7ms/step - accuracy: 0.4317 - loss: 53.5686
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.5471 - loss: 5.9216
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 2s - 10ms/step - accuracy: 0.4523 - loss: 55.8012
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.4344 - loss: 11.6328
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 5ms/step - accuracy: 0.4500 - loss: 48.4251
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.4873 - loss: 11.2700
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4479 - loss: 63.0977
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.5303 - loss: 6.3124
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4407 - loss: 38.4247
Epoch 2/2
186/186 - 0s - 910us/step - accuracy: 0.4743 - loss: 2.8511
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4434 - loss: 42.5129
Epoch 2/2
186/186 - 0s - 894us/step - accuracy: 0.5031 - loss: 2.9818
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.3829 - loss: 89.4135
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.5755 - loss: 3.4016
47/47 - 0s - 2ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


186/186 - 1s - 4ms/step - accuracy: 0.4730 - loss: 38.0597
Epoch 2/2
186/186 - 0s - 2ms/step - accuracy: 0.4577 - loss: 7.4547
47/47 - 0s - 3ms/step
Epoch 1/2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


465/465 - 1s - 3ms/step - accuracy: 0.4606 - loss: 28.4229
Epoch 2/2
465/465 - 1s - 2ms/step - accuracy: 0.5559 - loss: 2.3039
Best: 0.598733 using {'batch_size': 20, 'epochs': 2, 'model__optimizer': 'rmsprop'}
