In [2]:
# importing all libraries
import math
import random
import datetime as dt
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import tensorflow as tf
import keras.callbacks
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore") 

In [73]:
# Defining ticker and reading data

ticker = "DIS"

RUN_NAME = f"{ticker} dense classification"

df = pd.read_csv(f'data/stocks/{ticker}.csv', parse_dates=['Date'])
df["Difference"] = df["Close"].diff()
df["Movement"] = df.apply(lambda x: 1 if x["Difference"] > 0 else 0, axis=1)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Difference,Movement
0,1962-01-02,0.092908,0.096026,0.092908,0.092908,0.035517,817400,,0
1,1962-01-03,0.092908,0.094467,0.092908,0.094155,0.035994,778500,0.001247,1
2,1962-01-04,0.094155,0.094467,0.093532,0.094155,0.035994,934200,0.0,0
3,1962-01-05,0.094155,0.094779,0.093844,0.094467,0.036113,934200,0.000312,1
4,1962-01-08,0.094467,0.095714,0.092285,0.094155,0.035994,1245600,-0.000312,0


In [74]:
# Defining a function that will contain stocks data, cutting data before 1990
def specific_data(company, df, start = dt.datetime(1990,1,1), end = dt.datetime(2024,1,1)):
    date_filtered_data = df[(df['Date'] > start) & (df['Date'] < end)]
    return date_filtered_data

df = specific_data(ticker, df)

df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Difference,Movement
7041,1990-01-02,9.207334,9.515615,9.207334,9.495063,5.506028,6983400,0.287729,1
7042,1990-01-03,9.495063,9.577271,9.464234,9.515615,5.517947,8077000,0.020552,1
7043,1990-01-04,9.515615,9.608099,9.412854,9.505339,5.511988,7721800,-0.010276,0
7044,1990-01-05,9.505339,9.597823,9.495063,9.536166,5.529861,3961800,0.030828,1
7045,1990-01-08,9.536166,9.680031,9.52589,9.608099,5.64258,6647700,0.071933,1


In [75]:
# Splitting to train and split set and scale all data 

scaler = MinMaxScaler()
close_scaler = MinMaxScaler()
without_date = df.drop( "Date", axis="columns")

df_train, df_test = train_test_split(without_date, test_size=0.2, shuffle=False)

close_scaler.fit(np.array(df_train["Close"]).reshape(-1,1))
print(f"{close_scaler.scale_}, {close_scaler.min_}, ")
scaled_train = scaler.fit_transform(df_train)
scaled_test = scaler.transform(df_test)

print(scaled_train.shape)
print(scaled_test.shape)

[0.01313076], [-0.09431765], 
(6097, 8)
(1525, 8)


In [76]:
# preprocess data for LSTM with a sliding window

past = 14

# Prepare sequences for LSTM
X_train, y_train = [], []
for i in range(past, len(scaled_train)):
    X_train.append(scaled_train[i - past:i])
    y_train.append(scaled_train[i][-1])
X_train, y_train = np.array(X_train), np.array(y_train)

# Similarly prepare sequences for the test set
X_test, y_test = [], []
for i in range(past, len(scaled_test)):
    X_test.append(scaled_test[i - past:i])
    y_test.append(scaled_test[i][-1])
X_test, y_test = np.array(X_test), np.array(y_test)

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))

print("Training set size:-")
print(X_train.shape), print(y_train.shape)
print("Testing set size:-")
print(X_test.shape), print(y_test.shape)

Training set size:-
(6083, 14, 8, 1)
(6083,)
Testing set size:-
(1511, 14, 8, 1)
(1511,)


(None, None)

In [77]:
"""
y_Train = tf.keras.utils.to_categorical(
    y_train, num_classes=2, dtype='float32'
)
y_Test = tf.keras.utils.to_categorical(
    y_test, num_classes=2, dtype='float32'
)
"""
# Logging for Tensorboard

logger = keras.callbacks.TensorBoard(
        log_dir=f"logs/{RUN_NAME}",
        write_graph=True,
        histogram_freq=64
    )

In [78]:
# Initialize a sequential model
model = Sequential()

model.add(Dense(units=64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))

model.add(Dense(units=64, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(units=64, activation="relu"))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(1))

model.summary()


Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_28 (Dense)            (None, 14, 64)            576       
                                                                 
 dropout_21 (Dropout)        (None, 14, 64)            0         
                                                                 
 dense_29 (Dense)            (None, 14, 64)            4160      
                                                                 
 dropout_22 (Dropout)        (None, 14, 64)            0         
                                                                 
 dense_30 (Dense)            (None, 14, 64)            4160      
                                                                 
 dropout_23 (Dropout)        (None, 14, 64)            0         
                                                                 
 flatten_7 (Flatten)         (None, 896)              

In [79]:
# Compiling model
model.compile(loss='mse',optimizer='adam')

In [80]:
# Defining our callbacks
checkpoints = ModelCheckpoint(filepath = 'my_weights.h5', save_best_only = True)
# Defining our early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Training our lstm model
model.fit(X_train, y_train,
          validation_split=0.2,
          epochs=24,
          batch_size=64,
          verbose=1,
          callbacks= [checkpoints, early_stopping, logger])

Epoch 1/24
Epoch 2/24
Epoch 3/24
Epoch 4/24
Epoch 5/24
Epoch 6/24
Epoch 7/24
Epoch 8/24
Epoch 9/24
Epoch 10/24
Epoch 11/24
Epoch 12/24
Epoch 13/24
Epoch 14/24
Epoch 15/24
Epoch 16/24
Epoch 17/24
Epoch 18/24
Epoch 19/24


<keras.src.callbacks.History at 0x2310a3a0050>

In [81]:
# Prediction
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)



In [82]:
# Checking the direction of movement

train_movement=np.array(df_train["Movement"][past:])
test_movement=np.array(df_test["Movement"][past:])
print(train_movement.shape)
print(test_movement.shape)

train_decision = np.argmax(train_predict, axis=1)
test_decision = np.argmax(test_predict, axis=1)
print(train_decision.shape)
print(test_decision.shape)

counter = 0
good = 0
for i in range(0, len(train_movement)):
    if train_movement[i] == train_decision[i]:
        good +=1
    counter+=1
print(f"{good/counter}")

counter = 0
good = 0
for i in range(0, len(test_movement)):
    if train_movement[i] == train_decision[i]:
        good +=1
    counter+=1
print(f"{good/counter}")


(6083,)
(1511,)
(6083,)
(1511,)
0.5096169653131678
0.5433487756452681
