<a href="https://colab.research.google.com/github/ExpressGradient/sentinel_prime/blob/main/sentinel_prime_seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math
from tensorflow import keras

In [None]:
df = pd.read_csv('drive/My Drive/stock_data.csv')
df = df.replace(-1, 0)

ds = df.to_numpy()

train_ds, test_ds = ds[:math.floor(0.8 * ds.shape[0])], ds[math.floor(0.8 * ds.shape[0]):]

train_x, train_y = train_ds[:, 0], train_ds[:, 1]

In [None]:
max_length = 30

text_vectorizer = keras.layers.TextVectorization(
    max_tokens=10000,
    output_mode='int',
    output_sequence_length=max_length
)

text_vectorizer.adapt(train_x)

In [None]:
embeddings_index = {}

with open('drive/My Drive/twitter_glove_200d.txt') as f:
  for line in f:
    word, coefs = line.split(maxsplit=1)
    coefs = np.fromstring(coefs, sep=" ")
    embeddings_index[word] = coefs

len(embeddings_index)

1193515

In [None]:
embedding_dim = 200
max_tokens = 10000

vocabulary = text_vectorizer.get_vocabulary()
word_index = dict(zip(vocabulary, range(len(vocabulary))))

embedding_matrix = np.zeros((max_tokens, embedding_dim))

for word, i in word_index.items():
  if i < max_tokens:
    embedding_vector = embeddings_index.get(word)
  if embedding_vector is not None:
    embedding_matrix[i] = embedding_vector

In [None]:
model = keras.Sequential([
                          keras.Input(shape=(None,), dtype='int64'),
                          keras.layers.Embedding(
                            max_tokens,
                            embedding_dim,
                            embeddings_initializer=keras.initializers.Constant(embedding_matrix),
                            trainable=False,
                            mask_zero=True
                          ),
                          keras.layers.Bidirectional(
                              keras.layers.LSTM(32)
                          ),
                          keras.layers.Dropout(0.3),
                          keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [None]:
vectorized_train_x = text_vectorizer(train_x)

model.fit(vectorized_train_x, train_y.astype('float32'), epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f9877aaf350>

In [None]:
test_x, test_y = test_ds[:, 0], test_ds[:, 1]
vectorized_test_x = text_vectorizer(test_x)

model.evaluate(vectorized_test_x, test_y.astype('float32'))



[0.6821937561035156, 0.6298533082008362]

In [None]:
model.predict(x=text_vectorizer(np.array(['$AAPL share price falls as new CEO makes a racist comment'])))

array([[0.66246927]], dtype=float32)