In [205]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
import pandas as pd
import numpy as np


In [206]:
# Read CSV
df = pd.read_csv("stock_data.csv")
df['Sentiment'].replace(-1, 0, inplace=True) # change 0's in sentiment to 1's
sentiment = df.Sentiment
text = df.Text

In [207]:
# Tokenize words
from tensorflow.keras.preprocessing.text import Tokenizer
vocab_size = 1000
tokenizer = Tokenizer(num_words = vocab_size, oov_token = 'OOV')
tokenizer.fit_on_texts(text)
word_index = tokenizer.word_index # dictionary of words and values
sequences = tokenizer.texts_to_sequences(text)

In [208]:
# Pad vectors that represent each sentence
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded_sequences = pad_sequences(sequences)


In [209]:
# Create Model and Vectorize Words
sentiment_model = Sequential()
num_features = 128
sentiment_model.add(tf.keras.layers.Embedding(vocab_size, num_features, input_length = len(padded_sequences[0])))
sentiment_model.add(tf.keras.layers.LSTM(num_features))
sentiment_model.add(tf.keras.layers.Dropout(0.3))
sentiment_model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))
sentiment_model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
sentiment_model.summary()



Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_19 (Embedding)    (None, 34, 128)           128000    
                                                                 
 lstm_16 (LSTM)              (None, 128)               131584    
                                                                 
 dropout_7 (Dropout)         (None, 128)               0         
                                                                 
 dense_9 (Dense)             (None, 1)                 129       
                                                                 
Total params: 259,713
Trainable params: 259,713
Non-trainable params: 0
_________________________________________________________________


In [210]:
# Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(np.array(padded_sequences), np.array(sentiment), test_size=0.2)

In [211]:
# Train Model
sentiment_model.fit(X_train, y_train, epochs = 5, validation_data = (X_test, y_test), batch_size = 64)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f7af124eb20>

In [217]:
# test results
from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix
y_prob = sentiment_model.predict(X_test)

y_pred = []
for i in y_prob:
    y_pred.append(round(i[0]))
print(accuracy_score(y_pred, y_test))
confusion_matrix(y_test, y_pred)

0.7799827437446074


array([[276, 138],
       [117, 628]])