# TCN Training Only (Clean)

This notebook is for **training only**.
It uses isolated `train_*` variables and a Sharpe-based checkpoint policy.

## 1) Connect to Colab VM and Sync Repo
Run this first.

In [None]:
# Fresh-start cleanup cell (run before importing project modules)
import gc
import shutil
import subprocess
import sys
from pathlib import Path

TRAIN_REPO_URL = "https://github.com/Dave-DKings/tape_tcn_project.git"
TRAIN_REPO_DIR = Path("/content/adaptive_portfolio_rl")

# 1) Sync repo to latest main
if not (TRAIN_REPO_DIR / ".git").exists():
    subprocess.run(["git", "clone", TRAIN_REPO_URL, str(TRAIN_REPO_DIR)], check=True)

subprocess.run(["git", "-C", str(TRAIN_REPO_DIR), "fetch", "origin"], check=True)
subprocess.run(["git", "-C", str(TRAIN_REPO_DIR), "reset", "--hard", "origin/main"], check=True)

# 2) Remove old experiment outputs/checkpoints/cached data
purge_paths = [
    TRAIN_REPO_DIR / "tcn_fusion_results",
    TRAIN_REPO_DIR / "tcn_results",
    TRAIN_REPO_DIR / "tcn_att_results",
    TRAIN_REPO_DIR / "output_logs",
    TRAIN_REPO_DIR / "data" / "phase1_preparation_artifacts",
    TRAIN_REPO_DIR / "data" / "master_features_NORMALIZED.csv",
    TRAIN_REPO_DIR / "data" / "daily_ohlcv_assets.csv",              # forces fresh OHLCV download
    TRAIN_REPO_DIR / "data" / "processed_daily_macro_features.csv",   # forces fresh macro cache build
]

deleted = []
for p in purge_paths:
    if p.is_dir():
        shutil.rmtree(p, ignore_errors=True)
        deleted.append(str(p))
    elif p.is_file():
        p.unlink(missing_ok=True)
        deleted.append(str(p))

# 3) Remove Python/Jupyter cache folders
for cache_dir in TRAIN_REPO_DIR.rglob("__pycache__"):
    shutil.rmtree(cache_dir, ignore_errors=True)
for ckpt_dir in TRAIN_REPO_DIR.rglob(".ipynb_checkpoints"):
    shutil.rmtree(ckpt_dir, ignore_errors=True)

# 4) Clear loaded project modules from kernel memory
for mod in list(sys.modules.keys()):
    if mod.startswith("src.") or mod.startswith("src_"):
        del sys.modules[mod]
gc.collect()

print("‚úÖ Fresh start complete")
print(f"Repo: {TRAIN_REPO_DIR}")
print(f"Deleted paths: {len(deleted)}")
for d in deleted:
    print(" -", d)

## 2) Imports

In [None]:
from copy import deepcopy
from pathlib import Path

import pandas as pd

from src.config import get_active_config
from src.csv_logger import CSVLogger
from src.notebook_helpers.tcn_phase1 import prepare_phase1_dataset, run_experiment6_tape

## 3) Base Config and Dataset Prep

In [None]:
# ------------------------------------------------------------------
# Feature lock from CORE project pipeline (no metadata/manifest)
# ------------------------------------------------------------------
from src.data_utils import DataProcessor


def build_core_active_feature_columns(cfg):
    probe = DataProcessor(cfg)
    # DataProcessor.get_feature_columns("phase1") is the canonical feature list
    # used by env construction during training.
    return list(dict.fromkeys(probe.get_feature_columns("phase1")))


def apply_core_feature_lock(cfg, active_feature_columns):
    fp = cfg.setdefault("feature_params", {})
    fs = fp.setdefault("feature_selection", {})

    probe = DataProcessor(cfg)
    core_all_cols = list(dict.fromkeys(probe.get_feature_columns("phase1")))

    active_set = set(active_feature_columns)
    disabled = sorted([c for c in core_all_cols if c not in active_set])

    fs["disable_features"] = True
    fs["disabled_features"] = disabled

    return core_all_cols, disabled


In [None]:
TRAIN_RANDOM_SEED = 42

train_config = deepcopy(get_active_config("phase1"))

# Optional: override analysis horizon
# train_config["ANALYSIS_END_DATE"] = "2025-09-01"

# Build active features from core project files/pipeline.
train_active_feature_columns = build_core_active_feature_columns(train_config)
_, train_disabled_features = apply_core_feature_lock(train_config, train_active_feature_columns)

print("‚úÖ Core feature lock applied")
print("   active_feature_columns:", len(train_active_feature_columns))
print("   disabled_features:", len(train_disabled_features))

# Force fresh dataset build and market data re-download
if "train_phase1_data" in globals():
    del train_phase1_data

train_phase1_data = prepare_phase1_dataset(
    train_config,
    force_download=True,
    preparation_artifacts_dir="/content/adaptive_portfolio_rl/data_exports",
)


In [None]:
print("Train shape:", train_phase1_data.train_df.shape)
print("Test shape:", train_phase1_data.test_df.shape)

cols = train_phase1_data.train_df.columns
print("Total columns:", len(cols))

# quick sanity for common redundant groups
dup_like = [c for c in cols if c.endswith("_raw") or c.endswith("_unscaled")]
print("Potential redundant raw/unscaled cols:", len(dup_like))
print(dup_like[:20])

## 4) Training Overrides (Sharpe-Only Checkpoint Policy)

This policy keeps only Sharpe-threshold high-watermark checkpointing (`>= 0.5`) and disables rare/step/periodic/TAPE checkpoint routes.

In [None]:
from copy import deepcopy

train_config = deepcopy(train_config)  # or deepcopy(config) if that is your active object

tp = train_config["training_params"]
ap = train_config["agent_params"]
ppo = ap["ppo_params"]
env = train_config["environment_params"]

# Core setup
tp["max_total_timesteps"] = 100_000
tp["timesteps_per_ppo_update"] = 384  # fallback
tp["timesteps_per_ppo_update_schedule"] = [
    {"threshold": 0, "timesteps_per_update": 384},
    {"threshold": 30_000, "timesteps_per_update": 512},
]
tp["batch_size_ppo_schedule"] = [
    {"threshold": 0, "batch_size": 128},
    {"threshold": 30_000, "batch_size": 128},
]

# PPO KL management
ppo["target_kl"] = 0.020
ppo["kl_stop_multiplier"] = 1.25
ppo["minibatches_before_kl_stop"] = 2
ppo["policy_clip"] = 0.08
ppo["num_ppo_epochs"] = 1
ppo["max_grad_norm"] = 0.30

# LR + entropy
ppo["actor_lr"] = 8e-6
ppo["critic_lr"] = 1.2e-4
ppo["entropy_coef"] = 0.0015

# Dirichlet controls
ap["dirichlet_alpha_activation"] = "elu"
ap["dirichlet_logit_temperature"] = 1.20
ap["dirichlet_alpha_cap"] = 40.0
ap["dirichlet_epsilon"] = {"max": 0.9, "min": 0.3}

# Keep execution/turnover controls
tp["action_execution_beta_curriculum"] = {
    0: 0.20,
    30_000: 0.35,
}
tp["turnover_penalty_curriculum"] = {
    0: 1.50,
    10_000: 2.00,
    25_000: 2.50,
    40_000: 3.00,
}
env["target_turnover"] = 0.35
env["turnover_penalty_scalar"] = 1.50
env["transaction_cost_pct"] = 0.001

# Sharpe-only checkpoint policy
tp["high_watermark_checkpoint_enabled"] = True
tp["high_watermark_sharpe_threshold"] = 0.5
tp["step_sharpe_checkpoint_enabled"] = False
tp["periodic_checkpoint_every_steps"] = 0
tp["tape_checkpoint_threshold"] = 999.0
tp["rare_checkpoint_params"] = {"enable": False}

print("‚úÖ KL tuning + Sharpe-only checkpoint policy applied")
print("target_kl:", ppo["target_kl"])
print("rollout schedule:", tp["timesteps_per_ppo_update_schedule"])
print("batch schedule:", tp["batch_size_ppo_schedule"])
print("dirichlet:", ap["dirichlet_alpha_activation"], ap["dirichlet_logit_temperature"], ap["dirichlet_alpha_cap"], ap["dirichlet_epsilon"])
print("high_watermark_sharpe_threshold:", tp["high_watermark_sharpe_threshold"])


## 5) Run Training

In [None]:
RUN_TRAINING = True

if RUN_TRAINING:
    tp = train_config["training_params"]
    print("üöÄ Starting training")
    print("Architecture:", train_config["agent_params"].get("actor_critic_type"))
    print("max_total_timesteps:", tp["max_total_timesteps"])

    train_experiment6 = run_experiment6_tape(
        phase1_data=train_phase1_data,
        config=train_config,
        random_seed=TRAIN_RANDOM_SEED,
        csv_logger_cls=CSVLogger,
        use_covariance=True,
        architecture=train_config["agent_params"].get("actor_critic_type"),
        timesteps_per_update=tp.get("timesteps_per_ppo_update", 384),
        max_total_timesteps=tp["max_total_timesteps"],
    )

    print("‚úÖ Training complete")
    print("checkpoint_prefix:", train_experiment6.checkpoint_path)
else:
    print("‚ÑπÔ∏è RUN_TRAINING=False")

## 6) Inspect Latest Training Logs

In [None]:
TRAIN_RESULTS_ROOT = Path("/content/adaptive_portfolio_rl/tcn_fusion_results")
TRAIN_LOGS_DIR = TRAIN_RESULTS_ROOT / "logs"

episodes_files = sorted(TRAIN_LOGS_DIR.glob("*episodes*.csv"), key=lambda p: p.stat().st_mtime, reverse=True)
if not episodes_files:
    print(f"No episodes CSV found in {TRAIN_LOGS_DIR}")
else:
    train_episodes_path = episodes_files[0]
    train_episodes_df = pd.read_csv(train_episodes_path)
    print("Episodes file:", train_episodes_path)
    print("Rows:", len(train_episodes_df))
    display(train_episodes_df.tail(20))

## 7) Export Results Folder (Optional)
Creates a zip for download from Colab VM.

In [None]:
from pathlib import Path
import subprocess

EXPORT_RESULTS_ZIP = False
EXPORT_PATH = Path("/content/tcn_fusion_results_export.zip")
ROOT = Path("/content/adaptive_portfolio_rl")

if EXPORT_RESULTS_ZIP:
    include_paths = [
        ROOT / "tcn_fusion_results",
        ROOT / "data" / "phase1_preparation_artifacts",
        ROOT / "data" / "master_features_NORMALIZED.csv",
    ]

    existing = [p for p in include_paths if p.exists()]
    if not existing:
        print("‚ö†Ô∏è Nothing to export.")
    else:
        if EXPORT_PATH.exists():
            EXPORT_PATH.unlink()

        rel_items = [str(p.relative_to(ROOT)) for p in existing]
        cmd = f"cd {ROOT} && zip -qr {EXPORT_PATH} " + " ".join(rel_items)
        subprocess.run(cmd, shell=True, check=True)

        print(f"‚úÖ Created: {EXPORT_PATH}")
        print("Included:")
        for p in rel_items:
            print(" -", p)
else:
    print("‚ÑπÔ∏è EXPORT_RESULTS_ZIP=False")


In [None]:
from google.colab import drive
drive.mount('/content/drive')

!cp /content/tcn_fusion_results_export.zip /content/drive/MyDrive/
print("‚úÖ Copied to Drive: /content/drive/MyDrive/tcn_fusion_results_export.zip")