# Imports

In [1]:
import sys
import os
from google.colab import drive
from google.colab import files
from dotenv import load_dotenv
import json
import warnings
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")
%matplotlib inline

# Bootstrap

In [2]:
np.random.seed(31071967)

# Find and load the .env file from the current or parent directories
load_dotenv()

drive.mount('/content/drive')

with open(f"{os.getenv('PROJECT_PATH')}/src/config.json", 'r') as f:
    project_config = json.load(f)
    project_config.pop('_comment', None)
    project_config.pop('_note', None)
    f.close()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Download TKL data from YF

In [3]:
tickers_yf = {
    "y"     : f"{project_config['TKL']}",
 }

desired_order = [
    "Date",
    "y",
  ]

In [4]:
import yfinance as yf
import pandas as pd
from pandas_datareader import data as pdr

from datetime import date, timedelta
end_date = date.today() - timedelta(days=1)
start_date = end_date - timedelta(days=int(project_config["HISTORY_DEPTH"]))

if project_config['TKL'] == 'TNYA':
  start_date = pd.to_datetime("30.07.2022", format="%d.%m.%Y")

# ---- DOWNLOAD FROM YAHOO FINANCE ----
ts_yf = yf.download(
    tickers=list(tickers_yf.values()),
    start=start_date,
    end=end_date,
    auto_adjust=True
)["Close"]

# rename columns to readable names
rename_map = {v: k for k, v in tickers_yf.items()}
ts_yf = ts_yf.rename(columns=rename_map)

# Fill missing daily values for macro data (monthly)
ts_yf = ts_yf.fillna(method='ffill').fillna(method='bfill')
ts_yf = ts_yf.reset_index().rename(columns={"Date": "Date",})

print(f"\n\nDataset for y={project_config['TKL']}")
display(ts_yf.head(1))
display(ts_yf.tail(1))
ts_yf.info()
df = ts_yf.copy()

[*********************100%***********************]  1 of 1 completed



Dataset for y=NVDA





Ticker,Date,y
0,2006-10-23,0.48256


Ticker,Date,y
4820,2025-12-19,180.990005


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4821 entries, 0 to 4820
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    4821 non-null   datetime64[ns]
 1   y       4821 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 75.5 KB


In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# -----------------------
# Parameters
# -----------------------
STEP_DAYS = 10
WINDOW_DAYS = 260
FUTURE_DAYS = 66
BUY_TH = 0.10
SELL_TH = -0.10

# Setup Directories
OUT_ROOT_DIR = f"{os.getenv('PROJECT_PATH')}{project_config['images_directory']}{project_config['TKL']}_graphs/"
LABELS = ["BUY", "SELL", "KEEP"]

# Create Grandparent/Parent folders
for s in ["train", "valid"]:
    for l in LABELS:
        os.makedirs(os.path.join(OUT_ROOT_DIR, s, l), exist_ok=True)

# Create pred folder separately (no label subfolders)
os.makedirs(os.path.join(OUT_ROOT_DIR, "pred"), exist_ok=True)

# -----------------------
# Prepare data
# -----------------------
df = df.copy()
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date').reset_index(drop=True)

indices = list(range(0, len(df) - WINDOW_DAYS - FUTURE_DAYS, STEP_DAYS))
total_windows = len(indices)

train_split_idx = int(total_windows * 0.8)
pred_idx = total_windows - 1

results = []

# -----------------------
# Main loop
# -----------------------
for i, start_idx in enumerate(indices):
    graph_df = df.iloc[start_idx : start_idx + WINDOW_DAYS]
    future_df = df.iloc[start_idx + WINDOW_DAYS : start_idx + WINDOW_DAYS + FUTURE_DAYS]

    start_date = graph_df['Date'].iloc[0].date()
    end_date = graph_df['Date'].iloc[-1].date()

    # Recommendation logic
    last_price = graph_df['y'].iloc[-1]
    future_max = future_df['y'].max()
    ret = (future_max - last_price) / last_price

    if ret > BUY_TH:
        rec = "BUY"
    elif ret < SELL_TH:
        rec = "SELL"
    else:
        rec = "KEEP"

    # Define Save Path
    fname = f"{project_config['TKL']}_{start_date}_{end_date}_{rec}.png"

    if i == pred_idx:
        # Save directly to pred/
        save_path = os.path.join(OUT_ROOT_DIR, "pred", fname)
        split_name = "pred"
    elif i < train_split_idx:
        # Save to train/LABEL/
        save_path = os.path.join(OUT_ROOT_DIR, "train", rec, fname)
        split_name = "train"
    else:
        # Save to valid/LABEL/
        save_path = os.path.join(OUT_ROOT_DIR, "valid", rec, fname)
        split_name = "valid"

    # -----------------------
    # Plotting
    # -----------------------
    fig, ax = plt.subplots(figsize=(10, 5))
    ax.plot(graph_df['y'].values)
    ax.set_axis_off()
    plt.tight_layout()

    plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
    plt.close()

    results.append({"split": split_name, "recommendation": rec, "file": fname})

print(f"Done! Predicted image saved to: {os.path.join(OUT_ROOT_DIR, 'pred')}")

Done! Predicted image saved to: /content/drive/MyDrive/Projects/GitHub/Stocks/images/NVDA_graphs/pred


In [6]:
# import pandas as pd
# import matplotlib.pyplot as plt
# import os

# # -----------------------
# # Parameters
# # -----------------------
# STEP_DAYS = 10
# WINDOW_DAYS = 260
# FUTURE_DAYS = 66
# BUY_TH = 0.10     # +10%
# SELL_TH = -0.10   # -10%

# # Setup Directories
# OUT_ROOT_DIR = f"{os.getenv('PROJECT_PATH')}{project_config['images_directory']}{project_config['TKL']}_graphs/"
# LABELS = ["BUY", "SELL", "KEEP"]
# SPLITS = ["train", "valid", "pred"]

# # Create Grandparent/Parent folders
# for s in SPLITS:
#     for l in LABELS:
#         os.makedirs(os.path.join(OUT_ROOT_DIR, s, l), exist_ok=True)

# # -----------------------
# # Prepare data
# # -----------------------
# df = df.copy()
# df['Date'] = pd.to_datetime(df['Date'])
# df = df.sort_values('Date').reset_index(drop=True)

# # Calculate total possible windows
# indices = list(range(0, len(df) - WINDOW_DAYS - FUTURE_DAYS, STEP_DAYS))
# total_windows = len(indices)

# # Chronological Split Points
# train_split_idx = int(total_windows * 0.8)
# # The very last window is reserved for 'pred'
# pred_idx = total_windows - 1

# results = []

# # -----------------------
# # Main loop
# # -----------------------
# for i, start_idx in enumerate(indices):
#     graph_df = df.iloc[start_idx : start_idx + WINDOW_DAYS]
#     future_df = df.iloc[start_idx + WINDOW_DAYS : start_idx + WINDOW_DAYS + FUTURE_DAYS]

#     start_date = graph_df['Date'].iloc[0].date()
#     end_date = graph_df['Date'].iloc[-1].date()

#     # Recommendation logic
#     last_price = graph_df['y'].iloc[-1]
#     future_max = future_df['y'].max()
#     ret = (future_max - last_price) / last_price

#     if ret > BUY_TH:
#         rec = "BUY"
#     elif ret < SELL_TH:
#         rec = "SELL"
#     else:
#         rec = "KEEP"

#     # Determine Folder (Grandparent)
#     if i == pred_idx:
#         split_dir = "pred"
#     elif i < train_split_idx:
#         split_dir = "train"
#     else:
#         split_dir = "valid"

#     # -----------------------
#     # Plot (Pure Visuals)
#     # -----------------------
#     fig, ax = plt.subplots(figsize=(10, 5))
#     ax.plot(graph_df['y'].values) # use values to avoid index plotting

#     ax.set_axis_off() # Faster way to remove all spines/ticks
#     plt.tight_layout()

#     # Save to OUT_ROOT_DIR/split/label/filename.png
#     fname = f"{project_config['TKL']}_{start_date}_{end_date}_{rec}.png"
#     save_path = os.path.join(OUT_ROOT_DIR, split_dir, rec, fname)

#     plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
#     plt.close()

#     results.append({
#         "split": split_dir,
#         "recommendation": rec,
#         "file": fname
#     })

# print(f"Dataset generated in {OUT_ROOT_DIR}")

In [7]:
# import pandas as pd
# import matplotlib.pyplot as plt
# import os

# # -----------------------
# # Parameters
# # -----------------------
# STEP_DAYS = 10
# WINDOW_DAYS = 260
# FUTURE_DAYS = 66

# BUY_TH = 0.10     # +10%
# SELL_TH = -0.10   # -10%

# OUT_ROOT_DIR = f"{os.getenv('PROJECT_PATH')}{project_config['images_directory']}{project_config['TKL']}_graphs/"

# TRAIN_DIR = f"{OUT_ROOT_DIR}train/"
# TRAIN_SELL_DIR = f"{TRAIN_DIR}SELL/"
# TRAIN_BUY_DIR = f"{TRAIN_DIR}BUY/"
# TRAIN_KEEP_DIR = f"{TRAIN_DIR}KEEP/"

# VALID_DIR = f"{OUT_ROOT_DIR}valid/"
# VALID_SELL_DIR = f"{VALID_DIR}SELL/"
# VALID_BUY_DIR = f"{VALID_DIR}BUY/"
# VALID_KEEP_DIR = f"{VALID_DIR}KEEP/"

# os.makedirs(OUT_ROOT_DIR, exist_ok=True)

# os.makedirs(TRAIN_DIR, exist_ok=True)
# os.makedirs(TRAIN_SELL_DIR, exist_ok=True)
# os.makedirs(TRAIN_BUY_DIR, exist_ok=True)
# os.makedirs(TRAIN_KEEP_DIR, exist_ok=True)

# os.makedirs(TEST_DIR, exist_ok=True)
# os.makedirs(TEST_SELL_DIR, exist_ok=True)
# os.makedirs(TEST_BUY_DIR, exist_ok=True)
# os.makedirs(TEST_KEEP_DIR, exist_ok=True)


# # -----------------------
# # Prepare data
# # -----------------------
# df = df.copy()
# df['Date'] = pd.to_datetime(df['Date'])
# df = df.sort_values('Date').reset_index(drop=True)

# results = []

# # -----------------------
# # Main loop
# # -----------------------
# for start_idx in range(0, len(df) - WINDOW_DAYS - FUTURE_DAYS, STEP_DAYS):

#     graph_df = df.iloc[start_idx:start_idx + WINDOW_DAYS]
#     future_df = df.iloc[start_idx + WINDOW_DAYS:
#                          start_idx + WINDOW_DAYS + FUTURE_DAYS]

#     start_date = graph_df['Date'].iloc[0].date()
#     end_date = graph_df['Date'].iloc[-1].date()

#     # -----------------------
#     # Recommendation logic
#     # -----------------------
#     last_price = graph_df['y'].iloc[-1]
#     future_price = future_df['y'].max()

#     ret = (future_price - last_price) / last_price

#     if ret > BUY_TH:
#         rec = "BUY"
#     elif ret < SELL_TH:
#         rec = "SALE"
#     else:
#         rec = "KEEP"

#     # -----------------------
#     # Plot (PURE SHAPE)
#     # -----------------------
#     fig, ax = plt.subplots(figsize=(10, 5))
#     ax.plot(graph_df['y'])

#     # remove all axis visuals
#     ax.set_xticks([])
#     ax.set_yticks([])
#     ax.set_xlabel("")
#     ax.set_ylabel("")
#     ax.grid(False)

#     # optional: remove frame completely
#     for spine in ax.spines.values():
#         spine.set_visible(False)

#     plt.tight_layout()

#     fname = f"{project_config['TKL']}_{start_date}_{end_date}_{rec}.png"
#     plt.savefig(os.path.join(OUT_DIR, fname), bbox_inches="tight", pad_inches=0)
#     plt.close()

#     # -----------------------
#     # Store metadata
#     # -----------------------
#     results.append({
#         "start_date": start_date,
#         "end_date": end_date,
#         "last_price": last_price,
#         "future_price": future_price,
#         "66d_return": ret,
#         "recommendation": rec,
#         "file": fname
#     })

# # -----------------------
# # Results DataFrame
# # -----------------------
# df_classificaiton = pd.DataFrame(results)
# df_classificaiton.to_csv(f"{OUT_DIR}/graph_classifications.csv", index=False)
# df_classificaiton

In [8]:
# import pandas as pd

# # Define the evaluation period (66 days) and threshold (5%)
# horizon = 66
# threshold = 0.05
# step = 22
# window_size = 260

# classifications = []

# # We stop 'horizon' days before the end of the dataframe to ensure we have future data to check
# for i in range(0, len(df) - window_size - horizon, step):
#     # Current window end price
#     p_end = df.iloc[i + window_size - 1]['y']

#     # Price 66 days into the future
#     p_future = df.iloc[i + window_size + horizon - 1]['y']

#     # Calculate return
#     pct_change = (p_future - p_end) / p_end

#     # Classify
#     if pct_change > threshold:
#         recommendation = "Buy"
#     elif pct_change < -threshold:
#         recommendation = "Sale"
#     else:
#         recommendation = "Keep"

#     # Store result mapping to the filename
#     start_date = df.index[i].strftime('%Y-%m-%d')
#     classifications.append({
#         'Graph_File': f'nvda_window_{start_date}.png',
#         'Recommendation_nextQ': recommendation
#     })

# # Save results to a CSV
# class_df = pd.DataFrame(classifications)
# class_df.to_csv('graph_classifications.csv', index=False)

# print(class_df.head(10))