In [None]:
!pip install yfinance
!pip install pandas_ta
!pip install scikit-learn
!pip install matplotlib
!pip install plotly
!pip install "notebook>=5.3" "ipywidgets>=7.5"
!pip install bokeh

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python

import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
import tensorflow as tf
import keras
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler

# Load data
df = yf.Ticker('^GSPC').history(period='max', interval='1d')
df = df[['Open', 'High', 'Low', 'Close']].dropna()  # Select relevant columns and drop NA
print(df.tail())
num_features = len(df.columns)

# Prepare target column
df['Target'] = df['Close'].shift(-1)
df.dropna(inplace=True)

# Scale data
scaler = MinMaxScaler(feature_range=(0, 1))
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Display the scaled data
df_scaled.tail()


In [None]:
X = []
backcandles = 30

df = np.array(df_scaled)

for i in range (num_features):
  X.append([])
  for j in range (backcandles, df.shape[0]):
    X[i].append(df[j-backcandles:j, i])

X = np.moveaxis(X, 0, 2)
#print(X[:10])

X, yi = np.array(X), np.array(df[backcandles:, -1])
y = np.reshape(yi, (len(yi), 1))
print(X)
print(y)
print(X.shape)
print(y.shape)

In [107]:
split_ratio = 0.8
split_index = int(len(X) * split_ratio)
print(split_index)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

19472


In [None]:
from keras import optimizers
from keras.callbacks import History
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate
from keras.callbacks import EarlyStopping

lstm_input = Input(shape=(backcandles, num_features), name='lstm_input')
inputs = LSTM(64, name='first_layer', activation='relu')(lstm_input)
output = Dense(1, activation='linear', name='output')(inputs)
model = Model(inputs=lstm_input, outputs=output)
adam = optimizers.Adam()
model.compile(optimizer=adam,loss='mse',metrics=['mape'])

early_stopping = EarlyStopping(monitor='mape', patience=5, restore_best_weights=True)
model.fit(x=X_train,y=y_train,batch_size=15,epochs=32,callbacks=[early_stopping])

In [112]:
from sklearn.metrics import mean_absolute_percentage_error,max_error, mean_squared_error, r2_score
import numpy as np

y_pred = model.predict(X_test)

# Reshape y_test and y_pred to 1D if necessary
y_test = np.squeeze(y_test)
y_pred = np.squeeze(y_pred)

# Calculate performance metrics
mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage
mse = mean_squared_error(y_test, y_pred)
max_error = max_error(y_test, y_pred)

# Replace zeros in y_test to avoid division by zero
y_test_safe = np.where(y_test == 0, 1e-6, y_test)

# Compute the percentage deviations
percentage_deviation = np.abs((y_test - y_pred) / y_test_safe) * 100

# Maximum Percentage Deviation (MPD)
mpd = np.max(percentage_deviation)

# Find the index of the maximum percentage deviation
max_dev_index = np.argmax(percentage_deviation)

# Print the metrics
print(f"MAPE: {mape:.4f}%")
print(f"MPD(Maximum Percentage Deviation): {mpd:.4f}%")
print(f"MSE: {mse:.8f}")

# Print the true and predicted values at the point of maximum deviation
print(f"\nPoint of Maximum Deviation:")
print(f"True Value: {y_test[max_dev_index]}")
print(f"Predicted Value: {y_pred[max_dev_index]}\n")

for i in range(10):
    print(y_test[i],y_pred[i])



[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
MAPE: 1.8874%
MPD(Maximum Percentage Deviation): 14.9129%
MSE: 0.00023081

Point of Maximum Deviation:
True Value: 0.14879055581583456
Predicted Value: 0.17097948491573334

0.1994160268885231 0.19916414
0.20194154831242828 0.20015682
0.2024295591791803 0.20043907
0.20165893125067177 0.20131251
0.20327085140212234 0.20243801
0.20311969439875846 0.20267718
0.201581687935922 0.20328435
0.2009178687976089 0.2038109
0.20101151954717178 0.20334521
0.20268425552057023 0.20257404


In [106]:
from bokeh.plotting import figure, show, output_notebook
import numpy as np

# Enable Bokeh output in the notebook
output_notebook()

x = np.arange(len(y_test))   # Sample numbers from 1 to 100

# Create a figure
p = figure(title="Test vs Predicted Values (Interactive)",
           x_axis_label='Sample',
           y_axis_label='Value',
           width=800,
           height=400)

# Add Test line
p.line(x, y_test, legend_label="Test", line_width=2, color="black")
# Add Predicted line
p.line(x, y_pred, legend_label="Predicted", line_width=2, color="red")

# Show the plot
show(p)

4869
