# Imports

In [None]:
import sys
import os
from google.colab import drive
from google.colab import files
from dotenv import load_dotenv
import json
import warnings
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")
%matplotlib inline

# Bootstrap

In [None]:
np.random.seed(31071967)

# Find and load the .env file from the current or parent directories
load_dotenv()

drive.mount('/content/drive')

with open(f"{os.getenv('PROJECT_PATH')}/src/config.json", 'r') as f:
    project_config = json.load(f)
    project_config.pop('_comment', None)
    project_config.pop('_note', None)
    f.close()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
project_config['TKL']

'NVDA'

# Run prediciton

In [None]:
def make_datasets(df, X_cols, y_col):

    LOOK_BACK_DAYS = int(project_config['LOOK_BACK_DAYS'])

    X, y = [], []

    for i in range(LOOK_BACK_DAYS, len(df)):
        X.append(df.loc[i-LOOK_BACK_DAYS:i-1, X_cols].values)
        y.append(df.loc[i, y_col]) # Removed .values[0] as df.loc[i, y_col] is already a scalar

    X = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.float32)
    X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))

    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    dates = pd.to_datetime(df['Date'])
    dates_test = dates[-len(y_test):].values

    return X_train, X_test, y_train, y_test, dates_test


In [None]:
def predict_next_days(df, model, model_name, model_features, future_days=10):

  X_train, X_test, y_train, y_test, dates_test = make_datasets(df, model_features, 'y_next')

  last_historical_block = X_test[-1:] # Select the last sequence from X_test
  last_historical_day = df.iloc[-1].Date
  future_rolling_block = last_historical_block.copy()
  future_predictions = []

  for _ in range(future_days):

    next_pred_day = model.predict(future_rolling_block)[0]
    future_predictions.append(next_pred_day)

    new_input_row = future_rolling_block[0, -1, :].copy() # Copy last row of features
    new_input_row[0] = next_pred_day # Update the 'y' feature (first feature)
    future_rolling_block = np.roll(future_rolling_block, -1, axis=1) # Shift time window
    future_rolling_block[0, -1, :] = new_input_row # Place the new feature vector at the end

    min_y_next_orig = df_orig['y_next_orig'].min()
    max_y_next_orig = df_orig['y_next_orig'].max()
    np.array(future_predictions) * (max_y_next_orig - min_y_next_orig) + min_y_next_orig

  #print(f"Using {winning_model_name} and {winning_model_features} to predicti the next {future_days} days:")
  #print(f"-------------------------------------------")
  for i, p in enumerate(unscaled_future_predictions, start=1):
    # Convert last_historical_day to datetime object and add days
    prediction_date = pd.to_datetime(last_historical_day) + pd.offsets.BDay(i)
    print(f"{prediction_date.strftime('%Y-%m-%d')}: {float(p):.2f}")

In [None]:
# import pandas as pd
# from pandas.tseries.offsets import CustomBusinessDay
# from pandas.tseries.holiday import USFederalHolidayCalendar

# # 1. Define a custom business day that skips US Holidays
# # You can replace USFederalHolidayCalendar with other calendars or your own list of dates
# bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())

# # 2. Use a loop or date_range (more efficient)
# start_date = pd.to_datetime(last_historical_day)

# # Generate a list of the next 'n' valid business days automatically
# future_dates = pd.bdate_range(start=start_date, periods=len(unscaled_future_predictions)+1, freq=bday_us)[1:]

# for date, prediction in zip(future_dates, unscaled_future_predictions):
#     print(f"{date.strftime('%Y-%m-%d')}: {float(prediction):.2f}")

In [None]:
def predict_next_days(df, model, model_name, model_features, future_days=10):

  X_train, X_test, y_train, y_test, dates_test = make_datasets(df, model_features, 'y_next')

  last_historical_block = X_test[-1:] # Select the last sequence from X_test
  last_historical_day = df.iloc[-1].Date
  future_rolling_block = last_historical_block.copy()
  future_predictions = []

  for _ in range(future_days):

    next_pred_day = model.predict(future_rolling_block)[0]
    future_predictions.append(next_pred_day)

    new_input_row = future_rolling_block[0, -1, :].copy() # Copy last row of features
    new_input_row[0] = next_pred_day # Update the 'y' feature (first feature)
    future_rolling_block = np.roll(future_rolling_block, -1, axis=1) # Shift time window
    future_rolling_block[0, -1, :] = new_input_row # Place the new feature vector at the end

    min_y_next_orig = df_orig['y_next_orig'].min()
    max_y_next_orig = df_orig['y_next_orig'].max()
    # Re-scale the predicted values
    unscaled_future_predictions = np.array(future_predictions) * (max_y_next_orig - min_y_next_orig) + min_y_next_orig

  #print(f"Using {winning_model_name} and {winning_model_features} to predicti the next {future_days} days:")
  #print(f"-------------------------------------------")
  for i, p in enumerate(unscaled_future_predictions, start=1):
    # Convert last_historical_day to datetime object and add days
    prediction_date = pd.to_datetime(last_historical_day) + pd.offsets.BDay(i)
    print(f"{prediction_date.strftime('%Y-%m-%d')}: {float(p):.2f}")


In [None]:
from pathlib import Path

pickles_path = Path(f"{os.getenv('PROJECT_PATH')}{project_config['pickles_directory']}")
folder = Path(pickles_path)


if list(folder.glob(f"{project_config['TKL']}.model*.keras")) != []:

  import ast
  model_path = list(folder.glob(f"{project_config['TKL']}.model*.keras"))[0]
  df_path = list(folder.glob(f"{project_config['TKL']}.df.pkl"))[0]
  df_orig_path = list(folder.glob(f"{project_config['TKL']}.df_orig.pkl"))[0]

  fname = model_path.name  # extract filename only
  base = fname.removesuffix(".keras")
  #tkl_name, _, best_model_name, features_str = base.split(".", maxsplit=3)
  tkl_name, tag, model_and_features = base.split(".", maxsplit=2)
  best_model_name, features_str = model_and_features.rsplit(".", maxsplit=1)
  best_model_features = ast.literal_eval(features_str)

  print(f"TKL: {tkl_name}")
  print(f"Model: {best_model_name}")
  print(f"Features: {best_model_features}")

  import tensorflow as tf
  df = pd.read_pickle(df_path)
  df_orig = pd.read_pickle(df_orig_path)
  best_model = tf.keras.models.load_model(model_path)

  display(df.tail(1), df_orig.tail(1))
  best_model.summary()

  predict_next_days(df, best_model, best_model_name, best_model_features, future_days=10)

TKL: NVDA
Model: GRU
Features: ['y']


Unnamed: 0,Date,y_next,y
4823,2025-12-19,0.874145,0.874145


Ticker,Date,y_next_orig,y_orig
4823,2025-12-19,180.990005,180.990005


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 378ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
2025-12-22: 165.03
2025-12-23: 162.21
2025-12-24: 159.07
2025-12-25: 155.99
2025-12-26: 153.05
2025-12-29: 150.27
2025-12-30: 147.64
2025-12-31: 145.14
2026-01-01: 142.78
2026-01-02: 140.53
