In [4]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-Z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 15, batch_size=batch_size, verbose = 2)
model.save('sentiment.h5')
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/15
 - 29s - loss: 0.8294 - accuracy: 0.6440
Epoch 2/15
 - 26s - loss: 0.6822 - accuracy: 0.7113
Epoch 3/15
 - 27s - loss: 0.6219 - accuracy: 0.7387
Epoch 4/15
 - 27s - loss: 0.5829 - accuracy: 0.7536
Epoch 5/15
 - 27s - loss: 0.5378 - accuracy: 0.7807
Epoch 6/15
 - 27s - loss: 0.5022 - accuracy: 0.7934
Epoch 7/15
 - 26s - loss: 0.4654 - accuracy: 0.8115
Epoch 8/15
 - 27s - loss: 0.4396 - accuracy: 0.8217
Epoch 9/15
 - 27s - loss: 0.4088 - accuracy: 0.8375
Epoch 10/15
 - 26s - loss: 0.3790 - accuracy: 0.8482
Epoch 11/15
 - 26s - loss: 0.3613 - accuracy: 0.8504
Epoch 12/15
 - 27s - loss: 0.3373 - accuracy: 0.8652
Epoch 13/15
 - 27s - loss: 0.3151 - accuracy: 0.8757
Epoch 14/15
 - 27s - loss: 0.3027 - accuracy: 0.8760
Epoch 15/15
 - 27s - loss: 0.2897 - accuracy: 0.8821
1.3964386600862688
0.6515945792198181
['loss', 'accuracy']


In [5]:
  
from keras.models import load_model
from keras_preprocessing.sequence import pad_sequences
from keras_preprocessing.text import Tokenizer
import numpy as np

model = load_model('sentiment.h5')
test_data = ["A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump"]


tokenizer = Tokenizer(split=' ')
y=tokenizer.fit_on_texts(test_data)
X = tokenizer.texts_to_sequences(test_data)
print(X)

max_len = 28
X = pad_sequences(X, maxlen=max_len)
print(X)

class_names = ['Positive','Neutral','Negative']

preds = model.predict(X)
print(preds)
classes = model.predict_classes(X)
print(classes)
print(class_names[classes[0]])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


[[1, 3, 4, 5, 6, 2, 7, 8, 2, 9, 10, 11, 12, 13, 14, 15, 1, 16, 17, 18]]
[[ 0  0  0  0  0  0  0  0  1  3  4  5  6  2  7  8  2  9 10 11 12 13 14 15
   1 16 17 18]]
[[0.8443336  0.04547098 0.11019548]]
[0]
Positive


In [6]:
from keras.wrappers.scikit_learn import KerasClassifier

model = KerasClassifier(build_fn=createmodel,verbose=2)
batch_size= [10, 20, 40]
epochs = [1, 2]
param_grid= {'batch_size':batch_size, 'epochs':epochs}
from sklearn.model_selection import GridSearchCV
grid  = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(X_train,Y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/1
 - 47s - loss: 0.8314 - accuracy: 0.6422
Epoch 1/1
 - 48s - loss: 0.8300 - accuracy: 0.6419
Epoch 1/1
 - 48s - loss: 0.8274 - accuracy: 0.6446
Epoch 1/1
 - 47s - loss: 0.8297 - accuracy: 0.6426
Epoch 1/1
 - 48s - loss: 0.8252 - accuracy: 0.6488
Epoch 1/2
 - 47s - loss: 0.8352 - accuracy: 0.6438
Epoch 2/2
 - 45s - loss: 0.6873 - accuracy: 0.7129
Epoch 1/2
 - 47s - loss: 0.8254 - accuracy: 0.6470
Epoch 2/2
 - 48s - loss: 0.6853 - accuracy: 0.7160
Epoch 1/2
 - 48s - loss: 0.8332 - accuracy: 0.6394
Epoch 2/2
 - 48s - loss: 0.6812 - accuracy: 0.7119
Epoch 1/2
 - 46s - loss: 0.8346 - accuracy: 0.6413
Epoch 2/2
 - 46s - loss: 0.6839 - accuracy: 0.7095
Epoch 1/2
 - 46s - loss: 0.8286 - accuracy: 0.6457
Epoch 2/2
 - 48s - loss: 0.6805 - accuracy: 0.7135
Epoch 1/1
 - 25s - loss: 0.8396 - accuracy: 0.6412
Epoch 1/1
 - 24s - loss: 0.8334 - accuracy: 0.6406
Epoch 1/1
 - 25s - loss: 0.8354 - accuracy: 0.6381
Epoch 1/1
 - 25s - loss: 0.8408 - accuracy: 0.6363
Epoch 1/1
 - 25s - loss: 0.8318