In [81]:
import pandas as pd
import numpy as np
from signal_generator import signal_generation
from trade_simulator import Trade_Simulator
from sklearn.preprocessing import StandardScaler

def get_lookback(strategy):
    """Returns extra lookback days required for each strategy"""
    if strategy == "momentum":
        return 180  # Needs long-term MA
    elif strategy in ["mean_reversion", "macd"]:
        return 90  # Needs medium-term indicators
    elif strategy in ["breakout", "rsi", "vwap"]:
        return 60  # Needs short-term indicators
    else:
        return 60  # Default fallback

def extract_features_labels(stock, data, strategy, delay=2):
    """
    Extracts features from `window_size` days and labels from the next `window_size` days.
    
    Parameters:
    - stock: str, stock ticker symbol
    - data: pd.DataFrame, stock data (should include 'Close' and Volume)
    - strategy: str, trading strategy to evaluate
    - delay: int, cooldown period for trading signals in Trade_Simulator
    
    Returns:
    - X: np.array, extracted feature set (num_samples x num_features x num_days)
    - y: np.array, binary labels (1 = profitable, 0 = not profitable)
    """
    window_size = get_lookback(strategy)
    label_days = window_size  # Number of days used for labeling

    X, y = [], []

    for start in range(0,len(data) - 2 * window_size,label_days):
        print(start)
        end = start + window_size + label_days  # Extra past data for indicators
        
        # 🔹 Extract the full feature window with extra history
        feature_window = data.iloc[end-label_days-80:end-label_days].copy()
        full_window = data.iloc[start:end].copy()
        # # 🔹 Extract the label window (trades happen here)
        # label_window = data.iloc[start + feature_days : start + feature_days + window_size].copy()

        # 🔹 Compute signals & indicators using full feature window
        long_signal, short_signal, features = signal_generation(stock, data.iloc[start:end-label_days].copy(), strategy)
        features = np.array(features)[:,-80:]
        # 🔹 Normalize price and indicators
        price_scaler = StandardScaler()
        normalized_price = price_scaler.fit_transform(feature_window["Close"].values.reshape(-1, 1)).flatten()

        feature_scaler = StandardScaler()
        normalized_features = feature_scaler.fit_transform(features.T).T  

        # 🔹 Combine all features
        all_features = np.vstack([normalized_price, normalized_features]) 
        all_features = all_features.T  
        # 🔹 Simulate trading on the **label window**, but **start after `feature_days`**
        trade_log = Trade_Simulator(stock, full_window, strategy, stop_loss=False, delay=delay, log=True, 
                                    plot=False, start_trade=end-label_days-start)

        if trade_log is None or trade_log.empty:
            print("No Trades")
            continue  # Skip if no trades

        # 🔹 Determine if profitable
        final_capital = trade_log.iloc[-1]["Final Capital"]
        profit = 1 if final_capital > 10000 else 0  

        X.append(all_features)
        y.append(profit)

    print(f"Final dataset size: {len(X)} samples")
    return np.array(X), np.array(y)




In [83]:
import yfinance as yf
# Load Apple Stock Data (Ensure Close & Volume columns)
data = yf.download("AAPL", start="2010-01-01", end="2020-01-01")[["Close", "Volume"]].dropna()

# Extract features and labels for Momentum strategy
X, y = extract_features_labels("AAPL", data, strategy="momentum")

# Check Output Shapes
print("Feature Shape:", X.shape)  # Expected: (num_samples, num_features, num_days)
print("Label Shape:", y.shape)  # Expected: (num_samples,)


[*********************100%***********************]  1 of 1 completed

0
180
360
540
720
900
1080
1260
1440
1620
1800
1980
Final dataset size: 12 samples
Feature Shape: (12, 80, 3)
Label Shape: (12,)





In [84]:
X

array([[[-0.43323935, -3.16063657, -1.95767503],
        [-0.05025342, -2.9215825 , -1.9131271 ],
        [ 0.37829034, -2.63543005, -1.86086461],
        ...,
        [ 2.0859758 ,  0.05891435,  1.088987  ],
        [ 1.9557919 ,  0.18708213,  1.10295936],
        [ 2.8085473 ,  0.36219963,  1.12563151]],

       [[ 1.47966744, -3.03454208, -2.24292601],
        [ 1.75786111, -2.79004548, -2.14623186],
        [ 1.84383057, -2.53702717, -2.05014658],
        ...,
        [-0.12151363, -0.43593716,  1.23434815],
        [-0.76627318, -0.47723288,  1.22947487],
        [-1.48268782, -0.5934334 ,  1.2127161 ]],

       [[-0.34799245, -0.65909992, -1.75189738],
        [-0.35213066, -0.57818595, -1.67333718],
        [-0.55349377, -0.51167896, -1.59764962],
        ...,
        [ 2.32794224,  2.83412361,  2.18714545],
        [ 2.28387957,  3.03724669,  2.31151938],
        [ 2.36544381,  3.24101699,  2.44638694]],

       ...,

       [[-0.88307282, -0.81929323, -1.69956951],
        [-0

In [85]:
y

array([1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0])