In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, GRU
from keras.layers.embeddings import Embedding
from keras.layers.convolutional import Conv1D
from keras.layers.pooling import GlobalMaxPooling1D
from keras.layers import Flatten

In [42]:
dataset = pd.read_json('dataset/financialData.json')
X = dataset['title']
y = np.asarray([1 if s >= 0 else 0 for s in dataset['sentiment']])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [43]:
tokenizer_obj =  Tokenizer()

tokenizer_obj.fit_on_texts(dataset['title'])

max_length = max([len(s.split()) for s in dataset['title']])

vocab_size = len(tokenizer_obj.word_index) + 1

X_train_tokens = tokenizer_obj.texts_to_sequences(X_train)
X_test_tokens = tokenizer_obj.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_tokens, maxlen=max_length, padding='post')
X_test_pad = pad_sequences(X_test_tokens, maxlen=max_length, padding='post')

In [44]:
EMBEDDING_DIM = 100
model = Sequential()
model.add(Embedding(vocab_size, EMBEDDING_DIM, input_length=max_length))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [45]:
model.fit(X_train_pad, y_train, batch_size=256, epochs=25, verbose=2, validation_data=(X_test_pad, y_test))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 1920 samples, validate on 946 samples
Epoch 1/25
 - 1s - loss: 0.6707 - accuracy: 0.6344 - val_loss: 0.6580 - val_accuracy: 0.6374
Epoch 2/25
 - 1s - loss: 0.6581 - accuracy: 0.6344 - val_loss: 0.6562 - val_accuracy: 0.6374
Epoch 3/25
 - 1s - loss: 0.6568 - accuracy: 0.6344 - val_loss: 0.6533 - val_accuracy: 0.6374
Epoch 4/25
 - 1s - loss: 0.6512 - accuracy: 0.6344 - val_loss: 0.6432 - val_accuracy: 0.6374
Epoch 5/25
 - 1s - loss: 0.5227 - accuracy: 0.7156 - val_loss: 0.5761 - val_accuracy: 0.7664
Epoch 6/25
 - 1s - loss: 0.2910 - accuracy: 0.8917 - val_loss: 0.6852 - val_accuracy: 0.7791
Epoch 7/25
 - 1s - loss: 0.1789 - accuracy: 0.9438 - val_loss: 0.5347 - val_accuracy: 0.7664
Epoch 8/25
 - 1s - loss: 0.1191 - accuracy: 0.9661 - val_loss: 0.7099 - val_accuracy: 0.7791
Epoch 9/25
 - 1s - loss: 0.0813 - accuracy: 0.9776 - val_loss: 0.6400 - val_accuracy: 0.7822
Epoch 10/25
 - 1s - loss: 0.0593 - accuracy: 0.9818 - val_loss: 0.8204 - val_accuracy: 0.7759
Epoch 11/25
 - 1s - lo

<keras.callbacks.callbacks.History at 0x1d97e010f60>

In [46]:
score = model.evaluate(X_test_pad, y_test, verbose=1)
print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 0.9919893032148575
Test Accuracy: 0.7843551635742188
