# Visualize Training Segments

This notebook visualizes one training segment for each class in the dataset.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
from pathlib import Path

# Add project root to path to allow imports from src
project_root = Path("..").resolve()
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from src.utils.config import AppConfig

config = AppConfig()

In [None]:
# Load data
X_train = np.load(config.data_dir / "X_train.npy")
Y_train = np.load(config.data_dir / "Y_train.npy")

label_names = config.label_names
print("Loaded training data:", X_train.shape, Y_train.shape)
print("Labels:", label_names)

In [None]:
def plot_segment(x, y_seq, label_name):
    """Plots a single segment's OHLC close price and its sequence labels."""
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8), sharex=True, gridspec_kw={'height_ratios': [3, 1]})
    
    # Plot Price (Using Close price at index 3)
    # Features: 0:Open, 1:High, 2:Low, 3:Close
    ax1.plot(x[:, 3], label='Close', color='black', linewidth=1)
    # ax1.plot(x[:, 0], label='Open', alpha=0.5)
    ax1.set_title(f"Class Example: {label_name}")
    ax1.set_ylabel("Price")
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot sequence labels
    ax2.plot(y_seq, label='Label', color='red', drawstyle='steps-post')
    ax2.set_ylabel("Class ID")
    ax2.set_xlabel("Timestep")
    # Set y-ticks to show integer class IDs
    # ax2.set_yticks(sorted(label_names.keys()))
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Find and plot one example for each class
unique_classes = sorted(label_names.keys())

for cls_id in unique_classes:
    print(f"\nFinding example for Class {cls_id}: {label_names[cls_id]}")
    
    found = False
    for i in range(len(Y_train)):
        # Check if this class is present in the sequence
        if cls_id in Y_train[i]:
            # Filter logic to find a "good" example
            if cls_id == 0:
                # For background (0), any sequence works, but maybe pick one that is pure background if possible
                # or just the first one we find.
                plot_segment(X_train[i], Y_train[i], label_names[cls_id])
                found = True
                break
            else:
                # For patterns, ensure it's not just a single blip (noise)
                # Look for at least 10 consecutive timesteps or total timesteps
                if np.sum(Y_train[i] == cls_id) > 10:
                    plot_segment(X_train[i], Y_train[i], label_names[cls_id])
                    found = True
                    break
    
    if not found:
        print(f"No suitable example found for class {cls_id} in the training set.")