In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
import tabulate as tb
import matplotlib.pyplot as plt
import seaborn as sns
import joblib 
from tensorflow.keras.models import load_model
from stock_predictor import StockPredictor

In [None]:
SEQUENCE_LEN = 60
EPOCHS = 30
BATCH_SIZE=32

#BEGIN_DATE_TRAINING = '2022-12-31'
BEGIN_DATE_TRAINING = '2000-12-31'
END_DATE_TRAINING = '2024-12-31'

df = pd.read_csv("/kaggle/input/googl-2000-now/GOOGL.csv", parse_dates=['Date'], index_col='Date')
df_train = df[(df.index >= BEGIN_DATE_TRAINING) & (df.index <= END_DATE_TRAINING)]

FEATURES = [
    'Open', 'High', 'Low', 'Close', 'Volume',
    'rsi', 'MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9',
    'BBL_20_2.0_2.0','BBM_20_2.0_2.0','BBU_20_2.0_2.0','BBB_20_2.0_2.0','BBP_20_2.0_2.0',
    'ema_10','ema_20','ema_50','atr','stoch_k','stoch_d'
 ]


TARGET = 'Close'


In [None]:
def increment_date(df, date, max_date, num_of_days):
    cur_date = pd.to_datetime(date) + pd.Timedelta(days=num_of_days)
    while cur_date <= max_date:
        if cur_date in df.index:
            return cur_date
        cur_date += pd.Timedelta(days=1)
    return None

In [None]:
sp = StockPredictor.create_new(
    df_train=df_train,
    scaler_features_path='scaler_features.save',
    scaler_target_path='scaler_target.save',
    model_path='lstm_stock_model.h5',
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    seq_length=SEQUENCE_LEN,
    features=FEATURES,
    target=TARGET
)
sp.save()

max_date = df.index.max()
cur_date = pd.to_datetime(END_DATE_TRAINING)

plt_data = []

while True:
    # get the next date for prediction target
    next_day = increment_date(df, cur_date, max_date, 1)
    if next_day is None:
        break
    
    # Ensure we have enough history to form the window
    if cur_date not in df.index:
        cur_date = increment_date(df, cur_date, max_date, 1)
        continue

    last_window = sp.df_base[FEATURES].iloc[-sp.seq_length:]
    if len(last_window) < sp.seq_length:
        print(f"Not enough data for window on {cur_date.date()}, skipping.")
        break

    prediction = sp.predict(last_window)

    if next_day not in df.index:
        print(f"Next day {next_day.date()} missing in dataset.")
        break
    actual_value = df.loc[next_day, TARGET]

    idx_cur = (next_day - pd.to_datetime(END_DATE_TRAINING)).days
    idx_all = (max_date - pd.to_datetime(END_DATE_TRAINING)).days
    if idx_cur % 10 == 0:
        print(f"Percent of test data completed: {(idx_cur * 100 / idx_all):.2f}%")
    sp.update_model_online(last_window, actual_value)

    plt_data.append({
        'Date': next_day,
        'Predicted': prediction,
        'Actual': actual_value
    })

    cur_date = next_day
    
predictions = pd.DataFrame(plt_data)

In [None]:
#make plot
plt.figure(figsize=(14,7))
plt.plot(predictions['Date'], predictions['Actual'], label='Actual Prices', color='blue')
plt.plot(predictions['Date'], predictions['Predicted'], label='Predicted Prices', color='red')

plt.axvline(pd.to_datetime(END_DATE_TRAINING), color='green', linestyle='--', label='End of Training Data')

plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Stock Price Prediction vs Actual Prices')
plt.legend()
plt.grid()
plt.show()


# Calculate accuracy of movement prediction
actual_prices = predictions['Actual'].values
# predicted_prices = predictions['Predicted'].values --- IGNORE ---
dates_test = predictions['Date'].values
movement_actual = np.sign(np.diff(actual_prices))
movement_predicted = np.sign(np.diff(predictions['Predicted']))
# movement_predicted = np.sign(np.diff(predicted_prices.flatten())) --- IGNORE ---
accuracy = np.mean(movement_actual == movement_predicted)
print(f"Movement Prediction Accuracy: {accuracy * 100:.2f}%")
