In [70]:
import pandas as pd
import numpy as np
from signal_generator import signal_generation
from trade_simulator import Trade_Simulator
from sklearn.preprocessing import StandardScaler

def window_size_past(strategy):
    """Returns extra lookback days required for each strategy"""
    if strategy in ["momentum","mean_reversion"]:
        return 100  # Needs long-term MA
    elif strategy in ["macd"]:
        return  60 # Needs medium-term indicators
    elif strategy in ["breakout", "rsi", "vwap"]:
        return 30  # Needs short-term indicators
    else:
        return 60 # Default fallback

def extract_features_labels(stock, data, strategy,feature_window_len=30, delay=2):
    """
    Extracts features from `window_size` days and labels from the next `window_size` days.
    
    Parameters:
    - stock: str, stock ticker symbol
    - data: pd.DataFrame, stock data (should include 'Close' and Volume)
    - strategy: str, trading strategy to evaluate
    - delay: int, cooldown period for trading signals in Trade_Simulator
    
    Returns:
    - X: np.array, extracted feature set (num_samples x num_features x num_days)
    - y: np.array, binary labels (1 = profitable, 0 = not profitable)
    """
    history_window = window_size_past(strategy)
    window_size=feature_window_len
    label_days = history_window  # Number of days used for labeling

    X, y = [], []

    for start in range(0,len(data) - 2 *history_window ,label_days):

        # 🔹 Compute signals & indicators using full feature window
        long_signal, short_signal, features = signal_generation(stock, data.iloc[start:start+history_window +window_size], strategy)
        features = np.array(features)[:,-window_size:]
        # 🔹 Normalize price and indicators
        price_scaler = StandardScaler()
        normalized_price = price_scaler.fit_transform(data.iloc[start+history_window :start+history_window +window_size]["Close"].values.reshape(-1, 1)).flatten()

        feature_scaler = StandardScaler()
        normalized_features = feature_scaler.fit_transform(features.T).T  
        all_features = np.vstack([normalized_price, normalized_features]) 
        # 🔹 Simulate trading on the **label window**, but **start after `feature_days`**
        trade_log = Trade_Simulator(stock, data.iloc[start:start+history_window +window_size], strategy, stop_loss=False, delay=delay, log=True, 
                                    plot=False, start_trade=history_window)

        if trade_log is None or trade_log.empty:
            X.append(all_features)
            y.append(0)
            continue  # Skip if no trades

        # 🔹 Determine if profitable
        final_capital = trade_log.iloc[-1]["Final Capital"]
        profit = 1 if final_capital > 10000 else 0  

        X.append(all_features)
        y.append(profit)

    print(f"Final dataset size: {len(X)} samples")
    return np.array(X), np.array(y)




In [77]:
import yfinance as yf
# Load Apple Stock Data (Ensure Close & Volume columns)
data = yf.download("TGT", start="2000-01-01", end="2020-01-01")[["Close", "Volume"]].dropna()

# Extract features and labels for Momentum strategy
X, y = extract_features_labels("TGT", data, strategy="rsi")

# Check Output Shapes
print("Feature Shape:", X.shape)  # Expected: (num_samples, num_features, num_days)
print("Label Shape:", y.shape)  # Expected: (num_samples,)


[*********************100%***********************]  1 of 1 completed


Final dataset size: 166 samples
Feature Shape: (166, 2, 30)
Label Shape: (166,)
