In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore") 

# For processing
import math
import random
import datetime as dt
import matplotlib.dates as mdates

# For visualization
import matplotlib.pyplot as plt

# Libraries for model training
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


2023-10-22 21:54:00.649240: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
ticker = "MSFT"

df = pd.read_csv(f'data/stocks/{ticker}.csv', parse_dates=['Date'])
df["Difference"] = df["Close"].diff()
df["Movement"] = df.apply(lambda x: 1 if x["Difference"] > 0 else 0, axis=1)
df.head()

In [None]:
# Defining a function that will contain stocks data for a specific company
def specific_data(company, df, start = dt.datetime(1990,1,1), end = dt.datetime(2024,1,1)):
    # df["Name"] = company
    date_filtered_data = df[(df['Date'] > start) & (df['Date'] < end)]
    return date_filtered_data

df = specific_data(ticker, df)

df.head()

In [None]:
scaler = MinMaxScaler()
close_scaler = MinMaxScaler()
without_date = df.drop( "Date", axis="columns")

df_train, df_test = train_test_split(without_date, test_size=0.2, shuffle=False)

close_scaler.fit(np.array(df_train["Close"]).reshape(-1,1))
print(f"{close_scaler.scale_}, {close_scaler.min_}, ")
scaled_train = scaler.fit_transform(df_train)
scaled_test = scaler.transform(df_test)

print(scaled_train.shape)
print(scaled_test.shape)

In [None]:
past = 8

# Prepare sequences for LSTM
X_train, y_train = [], []
for i in range(past, len(scaled_train)):
    X_train.append(scaled_train[i - past:i])
    y_train.append(scaled_train[i][-1])
X_train, y_train = np.array(X_train), np.array(y_train)

# Similarly prepare sequences for the test set
X_test, y_test = [], []
for i in range(past, len(scaled_test)):
    X_test.append(scaled_test[i - past:i])
    y_test.append(scaled_test[i][-1])
X_test, y_test = np.array(X_test), np.array(y_test)

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))

print("Training set size:-")
print(X_train.shape), print(y_train.shape)
print("Testing set size:-")
print(X_test.shape), print(y_test.shape)

In [None]:
"""
y_Train = tf.keras.utils.to_categorical(
    y_train, num_classes=2, dtype='float32'
)
y_Test = tf.keras.utils.to_categorical(
    y_test, num_classes=2, dtype='float32'
)
"""

In [None]:
# Initialize a sequential model
model = Sequential()

model.add(Conv2D(filters=2048, kernel_size=(3,3), activation="relu", input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3])))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))         # Adding dropout to prevent overfitting

# Second LSTM layer with 64 units and return sequences
model.add(Dense(units=64, activation="relu"))
model.add(Dropout(0.2))

# Third LSTM layer with 64 units
model.add(Dense(units=64, activation="relu"))
model.add(Dropout(0.2))

# Add a dense output layer with one unit
model.add(Flatten())
model.add(Dense(1))

model.summary()


In [None]:
model.compile(loss='mse',optimizer='adam')

In [None]:
# Defining our callbacks
checkpoints = ModelCheckpoint(filepath = 'my_weights.h5', save_best_only = True)
# Defining our early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Training our lstm model
model.fit(X_train, y_Train,
          validation_split=0.2,
          epochs=24,
          batch_size=8,
          verbose=1,
          callbacks= [checkpoints, early_stopping])

In [None]:
# Let's do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

In [None]:
# Transform back to original form
# train_predict=scaler.inverse_transform(train_predict)
# test_predict=scaler.inverse_transform(test_predict)

train_movement=np.array(df_train["Movement"][past:])
test_movement=np.array(df_test["Movement"][past:])
print(train_movement.shape)
print(test_movement.shape)

train_decision = np.argmax(train_predict, axis=1)
test_decision = np.argmax(test_predict, axis=1)
print(train_decision.shape)
print(test_decision.shape)

counter = 0
good = 0
for i in range(0, len(train_movement)):
    if train_movement[i] == train_decision[i]:
        good +=1
    counter+=1
print(f"{good/counter}")

counter = 0
good = 0
for i in range(0, len(test_movement)):
    if train_movement[i] == train_decision[i]:
        good +=1
    counter+=1
print(f"{good/counter}")
