# WaveletDiff Evaluation (Refactored)

This notebook evaluates a trained WaveletDiff model. It acts as a frontend interface, delegating heavy logic to `src/evaluation` modules.

In [None]:
# @title Cell 1: Global Configuration
import os

# --- Drive Paths ---
DRIVE_MOUNT_PATH = "/content/drive" # @param {type:"string"}
DRIVE_BASE_PATH = "/content/drive/MyDrive/personal_drive/trading/waveletDiff" # @param {type:"string"}
CHECKPOINT_FOLDER = "checkpoints" # @param {type:"string"}
SAMPLES_FOLDER = "samples" # @param {type:"string"}

MODEL_FILENAME = "stocks_experiment.tar.gz" # @param {type:"string"}
MODEL_BASENAME = MODEL_FILENAME.replace('.tar.gz', '').replace('.zip', '').replace('.ckpt', '').replace('.tgz', '').replace('.gz', '')

DRIVE_CHECKPOINT_PATH = os.path.join(DRIVE_BASE_PATH, CHECKPOINT_FOLDER, MODEL_FILENAME)
DRIVE_SAMPLES_PATH = os.path.join(DRIVE_BASE_PATH, SAMPLES_FOLDER, MODEL_BASENAME)

# --- Repository Settings ---
REPO_BRANCH = "develop" # @param {type:"string"}

# --- Evaluation Settings ---
DATASET = "stocks" # @param {type:"string"}
EXPERIMENT_NAME = "evaluation_run" # @param {type:"string"}
NUM_SAMPLES = 2000 # @param {type:"integer"}
SAMPLING_METHOD = "ddpm" # @param ["ddpm", "ddim"]
COMPILE_MODE = "none" # @param ["none", "default", "reduce-overhead", "max-autotune"]
DEVICE = "cuda" # @param ["cuda", "cpu"]

# --- Evaluation Options ---
EXCLUDE_VOLUME = True # @param {type:"boolean"}
CACHE_SAMPLES_TO_DRIVE = True # @param {type:"boolean"}
USE_CACHED_SAMPLES = True # @param {type:"boolean"}

In [None]:
# @title Cell 2: Setup (Clone, Install, Mount)
import os
import sys
import shutil
from google.colab import drive

REPO_URL = "https://github.com/MilesHoffman/waveletDiff_synth_data.git"
REPO_NAME = "waveletDiff_synth_data"
REPO_PATH = os.path.abspath(REPO_NAME)

# 1. Clone or Pull Repo
if os.path.exists(REPO_PATH):
    nested_path = os.path.join(REPO_PATH, REPO_NAME)
    if os.path.exists(nested_path):
        shutil.rmtree(REPO_PATH)
        !git clone {REPO_URL} {REPO_NAME}
        !git -C {REPO_NAME} checkout {REPO_BRANCH}
    else:
        !git -C {REPO_NAME} fetch origin
        !git -C {REPO_NAME} checkout {REPO_BRANCH}
        !git -C {REPO_NAME} pull origin {REPO_BRANCH}
else:
    !git clone {REPO_URL} {REPO_NAME}
    !git -C {REPO_NAME} checkout {REPO_BRANCH}

# 2. Install Dependencies
!pip install -q pytorch-lightning pywavelets scipy pandas tqdm scikit-learn tslearn seaborn statsmodels

# 3. Mount Drive & Setup Paths
if not os.path.exists(DRIVE_MOUNT_PATH):
    drive.mount(DRIVE_MOUNT_PATH)

for p in [os.path.join(REPO_PATH, "src"), os.path.join(REPO_PATH, "src", "evaluation")]:
    if p not in sys.path: sys.path.append(p)

# 4. Prepare Experiments
local_exp_dir = os.path.join(REPO_PATH, "outputs", EXPERIMENT_NAME)
os.makedirs(local_exp_dir, exist_ok=True)

if os.path.exists(DRIVE_CHECKPOINT_PATH):
    print(f"Unpacking model from {DRIVE_CHECKPOINT_PATH}...")
    if DRIVE_CHECKPOINT_PATH.endswith(".ckpt"):
        shutil.copy2(DRIVE_CHECKPOINT_PATH, os.path.join(local_exp_dir, "checkpoint.ckpt"))
    else:
        shutil.unpack_archive(DRIVE_CHECKPOINT_PATH, local_exp_dir, format='gztar' if '.gz' in DRIVE_CHECKPOINT_PATH and not '.tar' in DRIVE_CHECKPOINT_PATH else None)
else:
    print(f"❌ Model file not found.")

# Sync configs
if os.path.exists(os.path.join(REPO_PATH, "WaveletDiff_source", "configs")):
    shutil.rmtree(os.path.join(REPO_PATH, "configs"), ignore_errors=True)
    shutil.copytree(os.path.join(REPO_PATH, "WaveletDiff_source", "configs"), os.path.join(REPO_PATH, "configs"))

print("✅ Setup Complete")

In [None]:
# @title Cell 3: Generate or Load Samples
import numpy as np
os.chdir(os.path.join(REPO_PATH, "src"))

# Dollar space paths
local_gen_path = f"../outputs/{EXPERIMENT_NAME}/{SAMPLING_METHOD}_samples.npy"
local_real_path = f"../outputs/{EXPERIMENT_NAME}/real_samples.npy"
drive_gen_path = os.path.join(DRIVE_SAMPLES_PATH, f"{SAMPLING_METHOD}_samples.npy")
drive_real_path = os.path.join(DRIVE_SAMPLES_PATH, "real_samples.npy")

# Reparameterized (norm) space paths
local_gen_norm_path = f"../outputs/{EXPERIMENT_NAME}/{SAMPLING_METHOD}_samples_norm.npy"
local_real_norm_path = f"../outputs/{EXPERIMENT_NAME}/real_samples_norm.npy"
drive_gen_norm_path = os.path.join(DRIVE_SAMPLES_PATH, f"{SAMPLING_METHOD}_samples_norm.npy")
drive_real_norm_path = os.path.join(DRIVE_SAMPLES_PATH, "real_samples_norm.npy")

if USE_CACHED_SAMPLES and os.path.exists(drive_gen_path):
    print("Loading cached samples...")
    os.makedirs(os.path.dirname(local_gen_path), exist_ok=True)
    shutil.copy2(drive_gen_path, local_gen_path)
    shutil.copy2(drive_real_path, local_real_path)
    # Copy norm files if they exist
    if os.path.exists(drive_gen_norm_path):
        shutil.copy2(drive_gen_norm_path, local_gen_norm_path)
        shutil.copy2(drive_real_norm_path, local_real_norm_path)
else:
    print(f"Generating {NUM_SAMPLES} samples...")
    output = !python sample.py --experiment_name {EXPERIMENT_NAME} --dataset {DATASET} --num_samples {NUM_SAMPLES} --sampling_method {SAMPLING_METHOD} --compile_mode {COMPILE_MODE}
    print("\n".join(output))
    
    if not os.path.exists(local_gen_path):
        print("\n❌ Generation failed: Output file not produced.")
        print("Possible causes: Checkpoint not found, CUDA error, or config mismatch.")
        raise FileNotFoundError(f"File does not exist: {local_gen_path}")

    if CACHE_SAMPLES_TO_DRIVE:
        os.makedirs(DRIVE_SAMPLES_PATH, exist_ok=True)
        shutil.copy2(local_gen_path, drive_gen_path)
        shutil.copy2(local_real_path, drive_real_path)
        # Cache norm files if they exist
        if os.path.exists(local_gen_norm_path):
            shutil.copy2(local_gen_norm_path, drive_gen_norm_path)
            shutil.copy2(local_real_norm_path, drive_real_norm_path)

print("✅ Samples Ready")

In [None]:
# @title Cell 4: Initialize Modules
import sys
import numpy as np
import torch
import matplotlib.pyplot as plt

# --- Evaluation Framework Imports ---
from evaluation import visualizations as viz
from evaluation import statistics as stats
from evaluation import reporting as report
from evaluation import wrappers

# New Runner for Comprehensive Metrics
from evaluation import EvaluationRunner, EvaluationConfig

from training import inline_evaluation

# Load Dollar Space Data
real_path = f"../outputs/{EXPERIMENT_NAME}/real_samples.npy"
gen_path = f"../outputs/{EXPERIMENT_NAME}/{SAMPLING_METHOD}_samples.npy"
real_data_full = np.load(real_path)
generated_data_full = np.load(gen_path)

print(f"Loaded Full OHLCV (Dollar): Real {real_data_full.shape}, Gen {generated_data_full.shape}")

# Load Reparameterized (Norm) Space Data
real_norm_path = f"../outputs/{EXPERIMENT_NAME}/real_samples_norm.npy"
gen_norm_path = f"../outputs/{EXPERIMENT_NAME}/{SAMPLING_METHOD}_samples_norm.npy"
HAS_NORM_DATA = os.path.exists(real_norm_path) and os.path.exists(gen_norm_path)

if HAS_NORM_DATA:
    real_data_norm_full = np.load(real_norm_path)
    generated_data_norm_full = np.load(gen_norm_path)
    print(f"Loaded Reparameterized: Real {real_data_norm_full.shape}, Gen {generated_data_norm_full.shape}")
else:
    print("⚠️ Reparameterized data not found. Re-run sample.py with updated code to generate.")
    real_data_norm_full = None
    generated_data_norm_full = None

# Downsample for metrics (keep indices consistent)
n_s = min(2000, len(real_data_full), len(generated_data_full))
np.random.seed(42)
real_idx = np.random.choice(len(real_data_full), n_s, replace=False)
gen_idx = np.random.choice(len(generated_data_full), n_s, replace=False)

# Full OHLCV for sample visualizations (Dollar Space)
real_data_ohlcv = real_data_full[real_idx]
generated_data_ohlcv = generated_data_full[gen_idx]

# Reparameterized subsets
real_data_norm = None
generated_data_norm = None
if HAS_NORM_DATA:
    real_data_norm = real_data_norm_full[real_idx]
    generated_data_norm = generated_data_norm_full[gen_idx]

# Apply EXCLUDE_VOLUME for metrics only
if EXCLUDE_VOLUME and real_data_full.shape[2] > 1:
    real_data = real_data_ohlcv[..., :-1]
    generated_data = generated_data_ohlcv[..., :-1]
else:
    real_data = real_data_ohlcv
    generated_data = generated_data_ohlcv

# Prepared Scaled Versions for Visualization
dmin, dmax = np.min(real_data, axis=(0,1), keepdims=True), np.max(real_data, axis=(0,1), keepdims=True)
real_data_scaled = (real_data - dmin) / (dmax - dmin + 1e-8)
generated_data_scaled = (generated_data - dmin) / (dmax - dmin + 1e-8)

print(f"Metrics Data (EXCLUDE_VOLUME={EXCLUDE_VOLUME}): {real_data.shape}")

### Visualizations

In [None]:
# @title Visual Analysis
# t-SNE and PCA use scaled OHLC data (no volume)
viz.plot_distribution_reduction(real_data_scaled, generated_data_scaled)
viz.plot_pdf(real_data_scaled, generated_data_scaled)

# Sample plots use FULL OHLCV data in Dollar Space
viz.plot_candlesticks(real_data_ohlcv, generated_data_ohlcv)
viz.plot_samples(real_data_ohlcv, generated_data_ohlcv)

### Comprehensive Metrics Evaluation

Evaluates model performance metrics including Tier 1 (Core) and Tier 2 (Advanced).

In [None]:
# @title Run Evaluation Metrics (Using EvaluationRunner)

# Configure Evaluation
config = EvaluationConfig(
    n_iterations=1,                # Iterations for stochastic metrics
    exclude_volume=EXCLUDE_VOLUME, # Use the global setting
    dtw_n_samples=100,             # Samples for DTW (expensive)
    correlation_sample_size=1000   # Samples for Cross-Correlation
)

runner = EvaluationRunner(config)

print("Running Full Evaluation...")
# Runner handles scaling and feature extraction internally
results = runner.run(
    real_dollar=real_data_ohlcv, 
    synth_dollar=generated_data_ohlcv,
    real_reparam=real_data_norm,
    synth_reparam=generated_data_norm
)

print("Evaluation Complete.")

In [None]:
# @title Display Scorecards
from IPython.display import display

print("\n--- DOLLAR SPACE SCORECARD ---")
display(report.display_scorecard(results['dollar']))

if 'reparam' in results:
    print("\n--- REPARAMETERIZED SPACE SCORECARD ---")
    display(report.display_scorecard(results['reparam']))