In [2]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)

291/291 - 44s - loss: 0.8179 - accuracy: 0.6444 - 44s/epoch - 151ms/step
144/144 - 4s - loss: 0.7465 - accuracy: 0.6726 - 4s/epoch - 31ms/step
0.7465280294418335
0.6725644469261169
['loss', 'accuracy']


In [3]:
model.save('sentiment_model.h5')

In [4]:
from keras.models import load_model
import numpy as np

loaded_model = load_model('sentiment_model.h5')

new_text = ["A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump"]
new_text = tokenizer.texts_to_sequences(new_text)
new_text = pad_sequences(new_text, maxlen=X.shape[1], dtype='int32', value=0)
sentiment_prob = loaded_model.predict(new_text, batch_size=1, verbose=2)[0]

sentiment_classes = ['Negative', 'Neutral', 'Positive']
sentiment_pred = sentiment_classes[np.argmax(sentiment_prob)]

print("Predicted sentiment: ", sentiment_pred)
print("Predicted probabilities: ", sentiment_prob)

1/1 - 1s - 728ms/epoch - 728ms/step
Predicted sentiment:  Negative
Predicted probabilities:  [0.49771252 0.16007847 0.342209  ]


In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.optimizers import Adam

def create_model(units=196, dropout=0.2, learning_rate=0.001):
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(units, dropout=dropout, recurrent_dropout=dropout))
    model.add(Dense(3, activation='softmax'))
    optimizer = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

model = KerasClassifier(build_fn=create_model, verbose=2)

units = [64, 128, 196]
dropout = [0.1, 0.2, 0.3]
learning_rate = [0.001, 0.01, 0.1]
epochs = [1]
batch_size = [32]

param_grid = dict(units=units, dropout=dropout, learning_rate=learning_rate, epochs=epochs, batch_size=batch_size)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, verbose=2)
grid_result = grid.fit(X_train, Y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Fitting 3 folds for each of 27 candidates, totalling 81 fits


  model = KerasClassifier(build_fn=create_model, verbose=2)
  super().__init__(name, **kwargs)


194/194 - 16s - loss: 0.8528 - accuracy: 0.6302 - 16s/epoch - 82ms/step
97/97 - 2s - loss: 0.7606 - accuracy: 0.6682 - 2s/epoch - 21ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=64; total time=  18.6s


  super().__init__(name, **kwargs)


194/194 - 16s - loss: 0.8435 - accuracy: 0.6302 - 16s/epoch - 81ms/step
97/97 - 2s - loss: 0.7815 - accuracy: 0.6779 - 2s/epoch - 20ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=64; total time=  18.8s


  super().__init__(name, **kwargs)


194/194 - 15s - loss: 0.8548 - accuracy: 0.6281 - 15s/epoch - 78ms/step
97/97 - 2s - loss: 0.7832 - accuracy: 0.6713 - 2s/epoch - 19ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=64; total time=  17.4s


  super().__init__(name, **kwargs)


194/194 - 21s - loss: 0.8500 - accuracy: 0.6349 - 21s/epoch - 108ms/step
97/97 - 2s - loss: 0.7643 - accuracy: 0.6675 - 2s/epoch - 22ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=128; total time=  23.7s


  super().__init__(name, **kwargs)


194/194 - 21s - loss: 0.8436 - accuracy: 0.6413 - 21s/epoch - 108ms/step
97/97 - 2s - loss: 0.7686 - accuracy: 0.6640 - 2s/epoch - 25ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=128; total time=  23.9s


  super().__init__(name, **kwargs)


194/194 - 21s - loss: 0.8464 - accuracy: 0.6375 - 21s/epoch - 108ms/step
97/97 - 2s - loss: 0.7730 - accuracy: 0.6748 - 2s/epoch - 23ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=128; total time=  23.8s


  super().__init__(name, **kwargs)


194/194 - 32s - loss: 0.8639 - accuracy: 0.6287 - 32s/epoch - 167ms/step
97/97 - 3s - loss: 0.7784 - accuracy: 0.6682 - 3s/epoch - 32ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=196; total time=  36.1s


  super().__init__(name, **kwargs)


194/194 - 35s - loss: 0.8520 - accuracy: 0.6334 - 35s/epoch - 178ms/step
97/97 - 5s - loss: 0.7746 - accuracy: 0.6646 - 5s/epoch - 56ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=196; total time=  40.6s


  super().__init__(name, **kwargs)


194/194 - 30s - loss: 0.8518 - accuracy: 0.6317 - 30s/epoch - 157ms/step
97/97 - 3s - loss: 0.7771 - accuracy: 0.6558 - 3s/epoch - 31ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.001, units=196; total time=  34.0s


  super().__init__(name, **kwargs)


194/194 - 16s - loss: 0.8360 - accuracy: 0.6404 - 16s/epoch - 84ms/step
97/97 - 2s - loss: 0.7879 - accuracy: 0.6566 - 2s/epoch - 21ms/step
[CV] END batch_size=32, dropout=0.1, epochs=1, learning_rate=0.01, units=64; total time=  18.7s


  super().__init__(name, **kwargs)
