# TCN Training Only (Clean)

This notebook is for **training only**.
It uses isolated `train_*` variables and a Sharpe-based checkpoint policy.

## 1) Connect to Colab VM and Sync Repo
Run this first.

In [None]:
# Fresh-start cleanup cell (run before importing project modules)
import gc
import shutil
import subprocess
import sys
from pathlib import Path

TRAIN_REPO_URL = "https://github.com/Dave-DKings/tape_tcn_project.git"
TRAIN_REPO_DIR = Path("/content/adaptive_portfolio_rl")

# 1) Sync repo to latest main
if not (TRAIN_REPO_DIR / ".git").exists():
    subprocess.run(["git", "clone", TRAIN_REPO_URL, str(TRAIN_REPO_DIR)], check=True)

subprocess.run(["git", "-C", str(TRAIN_REPO_DIR), "fetch", "origin"], check=True)
subprocess.run(["git", "-C", str(TRAIN_REPO_DIR), "reset", "--hard", "origin/main"], check=True)

# 2) Remove old experiment outputs/checkpoints/cached data
purge_paths = [
    TRAIN_REPO_DIR / "tcn_fusion_results",
    TRAIN_REPO_DIR / "tcn_results",
    TRAIN_REPO_DIR / "tcn_att_results",
    TRAIN_REPO_DIR / "output_logs",
    TRAIN_REPO_DIR / "data" / "phase1_preparation_artifacts",
    TRAIN_REPO_DIR / "data" / "master_features_NORMALIZED.csv",
    TRAIN_REPO_DIR / "data" / "daily_ohlcv_assets.csv",              # forces fresh OHLCV download
    TRAIN_REPO_DIR / "data" / "processed_daily_macro_features.csv",   # forces fresh macro cache build
]

deleted = []
for p in purge_paths:
    if p.is_dir():
        shutil.rmtree(p, ignore_errors=True)
        deleted.append(str(p))
    elif p.is_file():
        p.unlink(missing_ok=True)
        deleted.append(str(p))

# 3) Remove Python/Jupyter cache folders
for cache_dir in TRAIN_REPO_DIR.rglob("__pycache__"):
    shutil.rmtree(cache_dir, ignore_errors=True)
for ckpt_dir in TRAIN_REPO_DIR.rglob(".ipynb_checkpoints"):
    shutil.rmtree(ckpt_dir, ignore_errors=True)

# 4) Clear loaded project modules from kernel memory
for mod in list(sys.modules.keys()):
    if mod.startswith("src.") or mod.startswith("src_"):
        del sys.modules[mod]
gc.collect()

print("‚úÖ Fresh start complete")
print(f"Repo: {TRAIN_REPO_DIR}")
print(f"Deleted paths: {len(deleted)}")
for d in deleted:
    print(" -", d)

In [None]:
#from pathlib import Path
import os

root = Path("/content/adaptive_portfolio_rl")
print("Exists:", root.exists())
print("CWD:", os.getcwd())

print("\nTop-level:")
for p in sorted(root.iterdir()):
    kind = "DIR " if p.is_dir() else "FILE"
    print(f" - [{kind}] {p.name}")

# Quick check for outputs/caches you expected to be deleted
targets = [
    "tcn_fusion_results",
    "tcn_results",
    "tcn_att_results",
    "output_logs",
    "data/phase1_preparation_artifacts",
    "data/master_features_NORMALIZED.csv",
    "data/daily_ohlcv_assets.csv",
    "data/processed_daily_macro_features.csv",
]
print("\nTarget paths:")
for t in targets:
    p = root / t
    print(f" - {t}: {'EXISTS' if p.exists() else 'MISSING'}")


In [None]:
#!find /content/adaptive_portfolio_rl -maxdepth 3 | head -n 300

In [None]:
# Install project requirements in Colab VM
#import subprocess, sys
#from pathlib import Path

REPO_DIR = Path("/content/adaptive_portfolio_rl")
REQ_FILE = REPO_DIR / "requirements.txt"

if not REQ_FILE.exists():
    raise FileNotFoundError(f"Missing requirements file: {REQ_FILE}")

print("Using python:", sys.executable)
subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], check=True)
subprocess.run([sys.executable, "-m", "pip", "install", "-r", str(REQ_FILE)], check=True)

print("‚úÖ Requirements installed")


In [None]:
# --- GPU sanity/setup for TensorFlow ---
import os
import tensorflow as tf

# 1) Confirm Colab sees an NVIDIA GPU
!nvidia-smi -L

# 2) Confirm TensorFlow sees GPU(s)
gpus = tf.config.list_physical_devices("GPU")
print("TF GPUs:", gpus)
if not gpus:
    raise RuntimeError("No GPU visible to TensorFlow. In Colab: Runtime -> Change runtime type -> GPU")

# 3) Safer GPU memory behavior
for g in gpus:
    tf.config.experimental.set_memory_growth(g, True)

# 4) Optional: speed boost on modern GPUs
tf.keras.mixed_precision.set_global_policy("mixed_float16")
print("Mixed precision policy:", tf.keras.mixed_precision.global_policy())

# 5) Quick proof op runs on GPU
with tf.device("/GPU:0"):
    a = tf.random.normal((4096, 4096))
    b = tf.random.normal((4096, 4096))
    c = tf.matmul(a, b)

print("Matmul device:", c.device)
print("Default GPU device name:", tf.test.gpu_device_name())

In [None]:
import numpy, pandas, tensorflow
print("numpy", numpy.__version__)
print("pandas", pandas.__version__)
print("tensorflow", tensorflow.__version__)

## 2) Imports

In [None]:
import os, sys
from pathlib import Path

REPO_DIR = Path("/content/adaptive_portfolio_rl")

if not REPO_DIR.exists():
    raise FileNotFoundError(f"Repo not found: {REPO_DIR}")

# Set working directory
os.chdir(REPO_DIR)

# Add repo root to Python path
if str(REPO_DIR) not in sys.path:
    sys.path.insert(0, str(REPO_DIR))

print("cwd:", os.getcwd())
print("sys.path[0]:", sys.path[0])

In [None]:
from copy import deepcopy
from pathlib import Path

import pandas as pd

from src.config import get_active_config
from src.csv_logger import CSVLogger
from src.notebook_helpers.tcn_phase1 import prepare_phase1_dataset, run_experiment6_tape

## 3) Base Config and Dataset Prep

In [None]:
# ------------------------------------------------------------------
# Feature lock from CORE project pipeline (no metadata/manifest)
# ------------------------------------------------------------------
from src.data_utils import DataProcessor


def build_core_active_feature_columns(cfg):
    # Active list as defined by core config + current selection rules.
    probe = DataProcessor(cfg)
    return list(dict.fromkeys(probe.get_feature_columns("phase1")))


def apply_core_feature_lock(cfg, active_feature_columns):
    # Compute full candidate pool with selection filter temporarily disabled,
    # then enforce active-only by setting disabled_features = full - active.
    probe_cfg = deepcopy(cfg)
    probe_fp = probe_cfg.setdefault("feature_params", {})
    probe_fs = probe_fp.setdefault("feature_selection", {})
    probe_fs["disable_features"] = False
    probe_fs["disabled_features"] = []

    probe = DataProcessor(probe_cfg)
    core_all_cols = list(dict.fromkeys(probe.get_feature_columns("phase1")))

    active_set = set(active_feature_columns)
    disabled = sorted([c for c in core_all_cols if c not in active_set])

    fp = cfg.setdefault("feature_params", {})
    fs = fp.setdefault("feature_selection", {})
    fs["disable_features"] = True
    fs["disabled_features"] = disabled

    return core_all_cols, disabled


In [None]:
TRAIN_RANDOM_SEED = 42

train_config = deepcopy(get_active_config("phase1"))

# Optional: override analysis horizon
# train_config["ANALYSIS_END_DATE"] = "2025-09-01"

# Build active features from core project files/pipeline.
train_active_feature_columns = build_core_active_feature_columns(train_config)
_, train_disabled_features = apply_core_feature_lock(train_config, train_active_feature_columns)

print("‚úÖ Core feature lock applied")
print("   active_feature_columns:", len(train_active_feature_columns))
print("   disabled_features:", len(train_disabled_features))

# Force fresh dataset build and market data re-download
if "train_phase1_data" in globals():
    del train_phase1_data

train_phase1_data = prepare_phase1_dataset(
    train_config,
    force_download=True,
    preparation_artifacts_dir="/content/adaptive_portfolio_rl/data_exports",
)


In [None]:
print("Train shape:", train_phase1_data.train_df.shape)
print("Test shape:", train_phase1_data.test_df.shape)

cols = train_phase1_data.train_df.columns
print("Total columns:", len(cols))

# quick sanity for common redundant groups
dup_like = [c for c in cols if c.endswith("_raw") or c.endswith("_unscaled")]
print("Potential redundant raw/unscaled cols:", len(dup_like))
print(dup_like[:20])

In [None]:
cols

In [None]:
used = set(train_phase1_data.data_processor.get_feature_columns("phase1"))
disabled = set(train_config["feature_params"]["feature_selection"]["disabled_features"])

print("Used feature count:", len(used))
print("Disabled that still in used:", sorted(disabled & used))  # should be []
print("VIX_zscore used?", "VIX_zscore" in used)

In [None]:
base_cols = ["Date", "Ticker", "Open", "High", "Low", "Close", "Volume"]
keep = [c for c in base_cols + list(used) if c in train_phase1_data.master_df.columns]

train_phase1_data.master_df = train_phase1_data.master_df[keep].copy()
train_phase1_data.train_df = train_phase1_data.train_df[keep].copy()
train_phase1_data.test_df  = train_phase1_data.test_df[keep].copy()

## 4) Training Overrides (Sharpe-Only Checkpoint Policy)

This policy keeps only Sharpe-threshold high-watermark checkpointing (`>= 0.5`) and disables rare/step/periodic/TAPE checkpoint routes.

In [None]:
# ============================================================================
# NEXT RUN OVERRIDES (post-mortem tuned: KL stability + turnover control)
# ============================================================================
from copy import deepcopy

train_config = deepcopy(train_config)  # or deepcopy(config) if that's your active object

tp = train_config["training_params"]
ap = train_config["agent_params"]
ppo = ap["ppo_params"]
env = train_config["environment_params"]

# ----------------------------------------------------------------------------
# 1) Core run shape
# ----------------------------------------------------------------------------
tp["max_total_timesteps"] = 100_000
tp["timesteps_per_ppo_update"] = 384  # fallback

tp["timesteps_per_ppo_update_schedule"] = [
    {"threshold": 0, "timesteps_per_update": 384},
    {"threshold": 50_000, "timesteps_per_update": 448},
]

tp["batch_size_ppo_schedule"] = [
    {"threshold": 0, "batch_size": 96},
    {"threshold": 50_000, "batch_size": 112},
]

# ----------------------------------------------------------------------------
# 2) PPO stability (reduce aggressiveness)
# ----------------------------------------------------------------------------
ppo["num_ppo_epochs"] = 1
ppo["policy_clip"] = 0.08
ppo["target_kl"] = 0.020
ppo["kl_stop_multiplier"] = 1.25
ppo["minibatches_before_kl_stop"] = 2
ppo["max_grad_norm"] = 0.30

ppo["actor_lr"] = 8e-6
ppo["critic_lr"] = 1.2e-4
ppo["entropy_coef"] = 0.0020

tp["actor_lr_schedule"] = [
    {"threshold": 0, "lr": 8e-6},
    {"threshold": 30_000, "lr": 7e-6},
    {"threshold": 60_000, "lr": 6e-6},
]

# ----------------------------------------------------------------------------
# 3) RA-KL (less aggressive, prevent floor lock)
# ----------------------------------------------------------------------------
tp["ra_kl_enabled"] = True
tp["ra_kl_target_ratio"] = 1.0
tp["ra_kl_ema_alpha"] = 0.25
tp["ra_kl_gain"] = 0.03
tp["ra_kl_deadband"] = 0.20
tp["ra_kl_max_change_fraction"] = 0.05
tp["ra_kl_min_target_kl"] = 0.016
tp["ra_kl_max_target_kl"] = 0.030

# ----------------------------------------------------------------------------
# 4) Dirichlet + concentration controls
# ----------------------------------------------------------------------------
ap["dirichlet_alpha_activation"] = "elu"
ap["dirichlet_logit_temperature"] = 1.15
ap["dirichlet_alpha_cap"] = 35.0
ap["dirichlet_epsilon"] = {"max": 1.0, "min": 0.5}

env["concentration_penalty_scalar"] = 3.0
env["concentration_target_hhi"] = 0.12
env["top_weight_penalty_scalar"] = 2.0
env["action_realization_penalty_scalar"] = 0.5

# ----------------------------------------------------------------------------
# 5) Turnover + execution smoothing
# ----------------------------------------------------------------------------
env["target_turnover"] = 0.35
env["turnover_penalty_scalar"] = 1.5
env["transaction_cost_pct"] = 0.001

tp["action_execution_beta_curriculum"] = {
    0: 0.20,
    30_000: 0.35,
}
tp["evaluation_action_execution_beta"] = 0.35

tp["turnover_penalty_curriculum"] = {
    0: 1.5,
    10_000: 2.0,
    25_000: 2.5,
    40_000: 3.0,
}
tp["evaluation_turnover_penalty_scalar"] = 3.0

# ----------------------------------------------------------------------------
# 6) Episode horizon curriculum (keep cap late)
# ----------------------------------------------------------------------------
tp["use_episode_length_curriculum"] = True
tp["episode_length_curriculum_schedule"] = [
    {"threshold": 0, "limit": 252},
    {"threshold": 10_000, "limit": 504},
    {"threshold": 25_000, "limit": 756},
    {"threshold": 90_000, "limit": 1008},
]

# ----------------------------------------------------------------------------
# 7) Logging + checkpoints
# ----------------------------------------------------------------------------
tp["log_step_diagnostics"] = True
tp["update_log_interval"] = 5
tp["alpha_diversity_log_interval"] = 1
tp["alpha_diversity_warning_after_updates"] = 120
tp["alpha_diversity_warning_std_threshold"] = 0.25

tp["high_watermark_checkpoint_enabled"] = True
tp["high_watermark_sharpe_threshold"] = 0.5
tp["step_sharpe_checkpoint_enabled"] = False
tp["periodic_checkpoint_every_steps"] = 0
tp["rare_checkpoint_params"] = {"enable": False}
tp["tape_checkpoint_threshold"] = 999.0

print("‚úÖ Applied next-run override (KL-stable + smoother execution + moderate turnover control)")
print("num_ppo_epochs:", ppo["num_ppo_epochs"])
print("target_kl:", ppo["target_kl"], "| kl_stop_multiplier:", ppo["kl_stop_multiplier"])
print("RA-KL:", {k: tp[k] for k in [
    "ra_kl_enabled", "ra_kl_gain", "ra_kl_deadband",
    "ra_kl_max_change_fraction", "ra_kl_min_target_kl", "ra_kl_max_target_kl"
]})
print("action_execution_beta_curriculum:", tp["action_execution_beta_curriculum"])
print("turnover_penalty_curriculum:", tp["turnover_penalty_curriculum"])
print("concentration:", env["concentration_penalty_scalar"], env["concentration_target_hhi"], env["top_weight_penalty_scalar"])

## 5) Run Training

In [None]:
RUN_TRAINING = True

if RUN_TRAINING:
    tp = train_config["training_params"]
    print("üöÄ Starting training")
    print("Architecture:", train_config["agent_params"].get("actor_critic_type"))
    print("max_total_timesteps:", tp["max_total_timesteps"])

    train_experiment6 = run_experiment6_tape(
        phase1_data=train_phase1_data,
        config=train_config,
        random_seed=TRAIN_RANDOM_SEED,
        csv_logger_cls=CSVLogger,
        use_covariance=True,
        architecture=train_config["agent_params"].get("actor_critic_type"),
        timesteps_per_update=tp.get("timesteps_per_ppo_update", 384),
        max_total_timesteps=tp["max_total_timesteps"],
    )

    print("‚úÖ Training complete")
    print("checkpoint_prefix:", train_experiment6.checkpoint_path)
else:
    print("‚ÑπÔ∏è RUN_TRAINING=False")

## 6) Inspect Latest Training Logs

In [None]:
TRAIN_RESULTS_ROOT = Path("/content/adaptive_portfolio_rl/tcn_fusion_results")
TRAIN_LOGS_DIR = TRAIN_RESULTS_ROOT / "logs"

episodes_files = sorted(TRAIN_LOGS_DIR.glob("*episodes*.csv"), key=lambda p: p.stat().st_mtime, reverse=True)
if not episodes_files:
    print(f"No episodes CSV found in {TRAIN_LOGS_DIR}")
else:
    train_episodes_path = episodes_files[0]
    train_episodes_df = pd.read_csv(train_episodes_path)
    print("Episodes file:", train_episodes_path)
    print("Rows:", len(train_episodes_df))
    display(train_episodes_df.tail(20))

In [None]:
train_episodes_df.columns

## 7) Export Results Folder (Optional)
Creates a zip for download from Colab VM.

In [None]:
from pathlib import Path
import subprocess

EXPORT_RESULTS_ZIP = True
EXPORT_PATH = Path("/content/tcn_fusion_results_export2.zip")
ROOT = Path("/content/adaptive_portfolio_rl")

if EXPORT_RESULTS_ZIP:
    include_paths = [
        ROOT / "tcn_fusion_results",
        ROOT / "data" / "phase1_preparation_artifacts",
        ROOT / "data" / "master_features_NORMALIZED.csv",
    ]

    existing = [p for p in include_paths if p.exists()]
    if not existing:
        print("‚ö†Ô∏è Nothing to export.")
    else:
        if EXPORT_PATH.exists():
            EXPORT_PATH.unlink()

        rel_items = [str(p.relative_to(ROOT)) for p in existing]
        cmd = f"cd {ROOT} && zip -qr {EXPORT_PATH} " + " ".join(rel_items)
        subprocess.run(cmd, shell=True, check=True)

        print(f"‚úÖ Created: {EXPORT_PATH}")
        print("Included:")
        for p in rel_items:
            print(" -", p)
else:
    print("‚ÑπÔ∏è EXPORT_RESULTS_ZIP=False")


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!cp /content/tcn_fusion_results_export2.zip /content/drive/MyDrive/
print("‚úÖ Copied to Drive: /content/drive/MyDrive/tcn_fusion_results_export2.zip")