In [1]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions: 
# tf.compat.v1.set_random_seed(seed_value)

# # 5. Configure a new global `tensorflow` session
# from keras import backend as K
# # session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
# # sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
# # K.set_session(sess)
# # for later versions:
# session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
# tf.compat.v1.set_random_seed(seed_value)
# sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
# K.set_session(sess)

from pandas import read_csv
import pandas as pd
import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from tqdm import tqdm
# from numba import jit

In [2]:
#This version does (samples, features, timestamps)
def reshaper(data,  n_timestamps, n_features):
    n_samples = data.shape[0]

    result = np.empty((n_samples, n_features, n_timestamps))
    for i in range(n_samples):
        c=0
        for j in range (n_features):
            for k in range(n_timestamps):
                result[i][j][k]=data[i][c]
                c+=1
    
    return result

# #This version does (samples, timestamps, features)
# def reshaper(data,  n_timestamps, n_features):
#     n_samples = data.shape[0]

#     result = np.empty((n_samples, n_features, n_timestamps))
#     for i in range(n_samples):
#         c=0
#         for j in range (n_features):
#             for k in range(n_timestamps):
#                 result[i][k][j]=data[i][c]
#                 c+=1
    
#     return result

def differentiate_column(column):
    return column.diff().fillna(0)  # Using .diff() for differencing and filling NaNs with 0

def supervisedReframer(df, steps_back=1, steps_forward=1):
    new_df = pd.DataFrame()
    new_df['Date'] = pd.to_datetime(df['Date'], origin='1899-12-30', unit='D')

    # for col in df.columns:
    #     if col != "Date":
    #         df[col] =  differentiate_column(df[col])

    shifted_cols = {}
    for col in df.columns:
        if col != "Date":
            for shift in range(steps_back, -1, -1):
                if shift != 0:
                    new_col_name = f"{col} (t-{shift})"
                else:
                    new_col_name = f"{col}"
                shifted_cols[new_col_name] = df[col].shift(+shift).copy()

    
    target_col = {}
    for steps in range(1, steps_forward+1):
        new_col_name = f"Target (t+{steps})"
        target_col[new_col_name] = df['Adj Close'].shift(-steps).copy()

    # Concatenate shifted columns
    shifted_df = pd.DataFrame(shifted_cols)
    new_df = pd.concat([new_df, shifted_df], axis=1)

    # Concatenate target column
    target_df = pd.DataFrame(target_col)
    new_df = pd.concat([new_df, target_df], axis=1)
    
    new_df.dropna(inplace=True)

    return new_df

def TrainValTestSplit(dataframe, backwards_steps=1,forward_steps=1, n_features=1):
    forward_steps+=1
    df=dataframe.copy()
    df['Year'] = df['Date'].dt.year.copy()

    # Determine the years for training, validation, and testing
    training_years = df['Year'].unique()[:-2]  # All years except the last two
    validation_year = df['Year'].unique()[-2]  # Second to last year
    testing_year = df['Year'].unique()[-1]  # Last year

    # Select columns for X and y
    X_columns = df.columns[1:-forward_steps]
    y_columns = df.columns[-forward_steps:-1]
    
    # Filter the data based on the years
    X_train = df[df['Year'].isin(training_years)][X_columns].values
    y_train = df[df['Year'].isin(training_years)][y_columns].values
    
    X_val = df[df['Year'] == validation_year][X_columns].values
    y_val = df[df['Year'] == validation_year][y_columns].values
    
    X_test = df[df['Year'] == testing_year][X_columns].values
    y_test = df[df['Year'] == testing_year][y_columns].values
    
    scaler = MinMaxScaler()
    
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.fit_transform(X_val)
    X_test = scaler.fit_transform(X_test)
    
    y_train = scaler.fit_transform(y_train)
    y_val = scaler.fit_transform(y_val)
    y_test = scaler.fit_transform(y_test)

    X_train = reshaper(X_train, backwards_steps+1, n_features)
    X_val = reshaper(X_val, backwards_steps+1, n_features)
    X_test = reshaper(X_test, backwards_steps+1, n_features)

    # (Samples, timestamps, features version)!!!!
    # y_train = y_train.reshape((y_train.shape[0], forward_steps-1, 1))
    # y_val = y_val.reshape((y_val.shape[0], forward_steps-1, 1))
    # y_test = y_test.reshape((y_test.shape[0], forward_steps-1, 1))

    # (Samples, features, timestamps) version!
    y_train = y_train.reshape((y_train.shape[0], 1, forward_steps-1))
    y_val = y_val.reshape((y_val.shape[0], 1, forward_steps-1))
    y_test = y_test.reshape((y_test.shape[0], 1, forward_steps-1))
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [5]:
n_steps_back = 10 # Ako je 0, to znaci da koristis danas da predvidis sutra
n_steps_forward = 10 #Ako je 0, jbg ne predvidjas ista

# load dataset
dataset = read_csv('CSV Files with Calculated Indicators/JEX.csv', low_memory=False, header=0, index_col=None)

df = dataset

# df = pd.DataFrame({
#     'Date': dataset['Date'], 
#     'Adj Close': dataset['Adj Close']})

n_features = df.shape[1]-1

moved_df = supervisedReframer(df, n_steps_back, n_steps_forward).copy()

X_train,y_train,X_val,y_val,X_test,y_test = TrainValTestSplit(moved_df,n_steps_back,n_steps_forward,n_features)

memory_usage = moved_df.memory_usage(deep=True).sum() / (1024 * 1024)  # Convert bytes to megabytes
print("Memory usage of DataFrame: {:.2f} MB".format(memory_usage))

ValueError: could not convert string to float: '#DIV/0!'

In [None]:
### Create the model
inputs = keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2]))
x = keras.layers.LSTM(50, return_sequences=True)(inputs)
x = keras.layers.LSTM(25, return_sequences=True)(x)
x = keras.layers.LSTM(10)(x)
outputs = keras.layers.Dense(n_steps_forward, activation='linear')(x)

# optimizer = keras.optimizers.Adam(learning_rate=0.1)
# model.compile(optimizer=optimizer, loss="mse")

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss="mse")
model.summary()

early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=5,           # Number of epochs to wait before stopping
    restore_best_weights=True  # Restore weights to the best observed during training
)

# Train the model
history = model.fit(
    X_train, y_train,
    epochs = 2,
    batch_size = 48,
    validation_data=(X_val, y_val),
    verbose=1,
    shuffle=False
    # callbacks=[early_stopping]
)

y_test_reshaped = y_test.reshape((y_test.shape[0], y_test.shape[2]))

# estimate the R2 on the test set
print("LSTM coefficient of determination of the prediction: ", r2_score(y_test_reshaped, model.predict(X_test)))

X_test_skimmed = X_test[::n_steps_forward]
y_test_skimmed = y_test_reshaped[::n_steps_forward]

predictions = model.predict(X_test_skimmed)

y_test_skimmed = y_test_skimmed.flatten()

predictions = predictions.ravel()

graph_df = pd.DataFrame({
    'True': y_test_skimmed, 
    'Predictions': predictions})

last_n_dates = df['Date'].tail(len(graph_df)).tolist()
graph_df['Date'] = last_n_dates

graph_df['Date'] = pd.to_datetime(graph_df['Date'], origin='1899-12-30', unit='D')

graph_df['Predictions'] = graph_df['Predictions'].shift(-1)

#Plotting
x_min = graph_df['Date'].iloc[0]  # Replace with your desired minimum x-axis value
x_max = graph_df['Date'].iloc[-1]  # Replace with your desired maximum x-axis value
y_min = min(graph_df['True'].min(), graph_df['Predictions'].min())  # Replace with your desired minimum y-axis value
y_max = max(graph_df['True'].max(), graph_df['Predictions'].max())  # Replace with your desired maximum y-axis value

# Assuming your DataFrame is named df
# Replace 'Date', 'Close', and 'Predicted' with your actual column names
plt.figure(figsize=(10, 6))
plt.plot(graph_df['Date'], graph_df['True'], label='Close', linewidth=1)
plt.plot(graph_df['Date'], graph_df['Predictions'], label='Predictions', linestyle='dashed', color='red', linewidth=1)

# Customize the plot
plt.title('Adj Close vs. Predicted Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()

# Set x-axis and y-axis value ranges
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

plt.grid(False)
plt.xticks(rotation=0)

# Show the plot
plt.show()

In [None]:
# Initialize an empty dictionary to store the history
combined_history = {}

# Check if there is existing history
if 'history' in locals():
    # If history exists, add it to the combined history
    for key in history.history.keys():
        combined_history[key] = history.history[key]

# Train the model again, assuming `model` is already defined
new_history = model.fit(
    X_train, y_train,  # Use your new training data
    epochs=1,
    batch_size=32,
    validation_data=(X_val, y_val),  # Use your new validation data
    verbose=1,
    shuffle=False
    # callbacks=[early_stopping]
)

# Update the combined history with the new history
for key in new_history.history.keys():
    if key in combined_history:
        combined_history[key].extend(new_history.history[key])
    else:
        combined_history[key] = new_history.history[key]

# estimate the R2 on the test set
print("LSTM coefficient of determination of the prediction: ", r2_score(y_test_reshaped, model.predict(X_test)))

# fit model
plt.plot(combined_history['loss'], label='train')
plt.plot(combined_history['val_loss'], label='validation')
plt.legend()
plt.show()

X_test_skimmed = X_test[::n_steps_forward]
y_test_skimmed = y_test_reshaped[::n_steps_forward]

predictions = model.predict(X_test_skimmed)

y_test_skimmed = y_test_skimmed.flatten()

predictions = predictions.ravel()

graph_df = pd.DataFrame({
    'True': y_test_skimmed, 
    'Predictions': predictions})

last_n_dates = df['Date'].tail(len(graph_df)).tolist()
graph_df['Date'] = last_n_dates

graph_df['Date'] = pd.to_datetime(graph_df['Date'], origin='1899-12-30', unit='D')

graph_df['Predictions'] = graph_df['Predictions'].shift(-1)

#Plotting
x_min = graph_df['Date'].iloc[0]  # Replace with your desired minimum x-axis value
x_max = graph_df['Date'].iloc[-1]  # Replace with your desired maximum x-axis value
y_min = min(graph_df['True'].min(), graph_df['Predictions'].min())  # Replace with your desired minimum y-axis value
y_max = max(graph_df['True'].max(), graph_df['Predictions'].max())  # Replace with your desired maximum y-axis value

# Assuming your DataFrame is named df
# Replace 'Date', 'Close', and 'Predicted' with your actual column names
plt.figure(figsize=(10, 6))
plt.plot(graph_df['Date'], graph_df['True'], label='Close', linewidth=1)
plt.plot(graph_df['Date'], graph_df['Predictions'], label='Predictions', linestyle='dashed', color='red', linewidth=1)

# Customize the plot
plt.title('Adj Close vs. Predicted Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()

# Set x-axis and y-axis value ranges
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

plt.grid(False)
plt.xticks(rotation=0)

# Show the plot
plt.show()
