In [1]:
# set working directory :
import os
pwd = os.getcwd() + "/../"
os.chdir(pwd)


from quanta.clients.yfinance import YahooFinanceClient
from datetime import datetime, timedelta
import polars as pl
import numpy as np


In [2]:
tickers_map = {
    # Energy
    'CL': 'CL=F',   # WTI Crude Oil
    'NG': 'NG=F',   # Natural Gas
    'RB': 'RB=F',   # Gasoline
    'HO': 'HO=F',   # Heating Oil
    
    # Metals
    'GC': 'GC=F',   # Gold
    'SI': 'SI=F',   # Silver
    'HG': 'HG=F',   # Copper
    'PL': 'PL=F',   # Platinum
    
    # Agriculture
    'ZC': 'ZC=F',   # Corn
    'ZW': 'ZW=F',   # Wheat
    'ZS': 'ZS=F',   # Soybeans
    'KC': 'KC=F',   # Coffee
    'SB': 'SB=F',   # Sugar
    'CT': 'CT=F',   # Cotton
    
    # Livestock
    'LE': 'LE=F',   # Live Cattle
    'HE': 'HE=F',   # Lean Hogs
}

initial = 100_000_000
window_days = 30*120  # 12 months
from_date = datetime.now() - timedelta(days=window_days)
to_date = datetime.now() - timedelta(days=1)

timeframe = "1d"

portfolio_target_sigma = 0.43

In [14]:
def clean_price_data(df: pl.DataFrame) -> pl.DataFrame:
    """
    Nettoie automatiquement les données de prix invalides
    
    Enlève:
    - Prix négatifs ou nuls
    - Prix avec variation >50% en 1 période (erreurs de données)
    - Lignes avec valeurs manquantes
    """
    # 1. Enlève prix négatifs/nuls
    df = df.filter(
        (pl.col('open') > 0) & 
        (pl.col('high') > 0) & 
        (pl.col('low') > 0) & 
        (pl.col('close') > 0)
    )
    
    # 2. Enlève lignes avec NaN
    df = df.drop_nulls(subset=['open', 'high', 'low', 'close'])
    
    # 3. Enlève variations extrêmes (>50% en 1 période = probablement erreur)
    df = df.with_columns(
        (pl.col('close') / pl.col('close').shift(1))
        .over('symbol')
        .alias('price_ratio')
    )
    
    df = df.filter(
        (pl.col('price_ratio').is_null()) |  # Garde première ligne
        ((pl.col('price_ratio') > 0.5) & (pl.col('price_ratio') < 2.0))  # ±50%
    )
    
    df = df.drop('price_ratio')
    
    # 4. Vérifie cohérence OHLC (high >= low, close entre low et high)
    df = df.filter(
        (pl.col('high') >= pl.col('low')) &
        (pl.col('close') <= pl.col('high')) &
        (pl.col('close') >= pl.col('low'))
    )
    
    return df

In [15]:
def add_daily_returns(history: pl.DataFrame, method: str = 'pct') -> pl.DataFrame:
    """
    Ajoute une colonne 'daily_return' au DataFrame
    
    Parameters:
    -----------
    method : str
        'delta' : prix[t] - prix[t-1] (pour futures avec prix négatifs possibles)
        'pct' : (prix[t] - prix[t-1]) / |prix[t-1]| (pourcentage)
        'log' : ln(prix[t] / prix[t-1]) (classique mais problème avec négatifs)
    """
    dtcol = "datetime" if "datetime" in history.columns else "timestamp"
    
    # Ajoute la colonne date au df principal
    history = history.with_columns(
        pl.col(dtcol).dt.date().alias("date")
    )
    
    # Aggregate à daily
    daily = (
        history
        .group_by(["symbol", "date"], maintain_order=True)
        .agg(pl.col("close").last())
        .sort(["symbol", "date"])
    )
    
    # Calcule returns selon méthode
    if method == 'delta':
        daily = daily.with_columns(
            (pl.col("close") - pl.col("close").shift(1))
            .over("symbol")
            .alias("daily_return")
        )
    elif method == 'pct':
        daily = daily.with_columns(
            ((pl.col("close") - pl.col("close").shift(1)) / pl.col("close").shift(1).abs())
            .over("symbol")
            .alias("daily_return")
        )
    elif method == 'log':
        daily = daily.with_columns(
            (pl.col("close") / pl.col("close").shift(1)).log()
            .over("symbol")
            .alias("daily_return")
        )
    
    # Join back to original (maintenant "date" existe dans les deux)
    return history.join(
        daily.select(["symbol", "date", "daily_return"]),
        on=["symbol", "date"],
        how="left"
    )

In [16]:
# declare dataframe 
df_list = []  # Liste pour accumuler les DataFrames

yh = YahooFinanceClient()

for sym_key, sym_value in tickers_map.items():  # Itère sur (clé, valeur)
    df_cl = yh.get_price(
        sym_value,  # Utilise la valeur du dictionnaire ('CL=F', 'NG=F', etc.)
        from_date=from_date.strftime("%Y-%m-%d"),
        to_date=to_date.strftime("%Y-%m-%d"),
        interval=timeframe, 
        postclean=True
    )
    # Ajoute la colonne symbol
    df_cl = df_cl.with_columns(pl.lit(sym_value).alias("symbol"))
    # Ajoute à la liste
    df_list.append(df_cl)

# Concatène tous les DataFrames
df = pl.concat(df_list) if df_list else pl.DataFrame()
df = clean_price_data(df)
df = add_daily_returns(df)
df

timestamp,datetime,open,high,low,close,volume,symbol,date,daily_return
i64,datetime[μs],f64,f64,f64,f64,i64,str,date,f64
1452488400,2016-01-11 06:00:00,32.939999,33.200001,30.879999,31.41,648640,"""CL=F""",2016-01-11,
1452574800,2016-01-12 06:00:00,31.110001,32.209999,29.93,30.440001,627218,"""CL=F""",2016-01-12,-0.030882
1452661200,2016-01-13 06:00:00,30.540001,31.709999,30.1,30.48,637903,"""CL=F""",2016-01-13,0.001314
1452747600,2016-01-14 06:00:00,30.6,31.77,30.280001,31.200001,537906,"""CL=F""",2016-01-14,0.023622
1452834000,2016-01-15 06:00:00,31.18,31.18,29.129999,29.42,329094,"""CL=F""",2016-01-15,-0.057051
…,…,…,…,…,…,…,…,…,…
1762923600,2025-11-12 06:00:00,82.0,82.275002,80.5,80.625,39375,"""HE=F""",2025-11-12,-0.020947
1763010000,2025-11-13 06:00:00,80.275002,80.400002,77.849998,78.074997,44114,"""HE=F""",2025-11-13,-0.031628
1763096400,2025-11-14 06:00:00,78.0,79.5,77.925003,78.5,22846,"""HE=F""",2025-11-14,0.005444
1763355600,2025-11-17 06:00:00,78.5,79.125,78.349998,78.574997,14343,"""HE=F""",2025-11-17,0.000955


In [17]:
def get_trading_signal(history: pl.DataFrame) -> dict:
    """
    TREND signal basé sur t-stat des daily returns
    Utilise la colonne 'daily_return' pré-calculée
    """
    symbols = sorted(history['symbol'].unique().to_list())
    dict_results = {}
    
    for symbol in symbols:
        
        # Récupère les returns pour ce symbol (déjà calculés)
        returns = (
            history
            .filter(pl.col('symbol') == symbol)
            .select('daily_return')
            .drop_nulls()
            .to_series()
            .to_numpy()
        )
        
        if len(returns) < 2:
            dict_results[symbol] = 0.0
            continue
        
        # t-statistic
        mean = np.mean(returns)
        std = np.std(returns, ddof=1)
        n = len(returns)
        
        if std == 0:
            t_stat = 0
        else:
            t_stat = mean / (std / np.sqrt(n))
        
        dict_results[symbol] = float(np.clip(t_stat, -1, 1))
    
    return dict_results

In [18]:
def get_y_z_volatility(
        history: pl.DataFrame, 
        available_symbols: list[str], 
        one_month: int = 30,
        data_frequency: str = "1h"
    ):
        """
        Yang & Zhang Drift-Independent Volatility Estimation
        VERSION AVEC FILTRES POUR PRIX NÉGATIFS
        """
        results = []
        available_symbols = sorted(available_symbols)
        
        annualization_factors = {
            "1h": np.sqrt(252 * 24),
            "4h": np.sqrt(252 * 6),
            "1d": np.sqrt(252),
            "1D": np.sqrt(252),
            "daily": np.sqrt(252),
        }
        
        annualization = annualization_factors.get(data_frequency, np.sqrt(252))
        
        if "datetime" not in history.columns:
            raise ValueError("history must have a 'datetime' column")
        
        history = history.with_columns(
            pl.col("datetime").cast(pl.Datetime("ns"))
        )
        
        latest_date = history.filter(pl.col("symbol") == available_symbols[0])["datetime"].max()
        cutoff_date = latest_date - timedelta(days=one_month)
        
        for ticker in available_symbols:
            past_month = (
                history.filter(
                    (pl.col("symbol") == ticker) & 
                    (pl.col("datetime") >= cutoff_date)
                )
                .sort("datetime")
            )
            
            estimation_period = past_month.shape[0]
            
            if estimation_period <= 1:
                results.append(np.nan)
                continue
            
            # Convert to NumPy
            o = past_month["open"].to_numpy()
            h = past_month["high"].to_numpy()
            l = past_month["low"].to_numpy()
            c = past_month["close"].to_numpy()
            
            # ✅ FILTRE CRITIQUE: enlève les prix négatifs/nuls
            mask = (o > 0) & (h > 0) & (l > 0) & (c > 0)
            o, h, l, c = o[mask], h[mask], l[mask], c[mask]
            
            if len(c) < 2:
                results.append(np.nan)
                continue
            
            # Calculate k
            k = 0.34 / (1.34 + (len(c) + 1) / max(len(c) - 1, 1))
            
            # sigma_o_j : overnight jump vol
            oc_log_returns = np.log(o[1:] / c[:-1])
            oc_log_returns = oc_log_returns[np.isfinite(oc_log_returns)]
            
            if len(oc_log_returns) < 2:
                results.append(np.nan)
                continue
            
            sigma_oj = np.std(oc_log_returns, ddof=1)
            
            # sigma_s_d : standard vol
            cc_log_returns = np.log(c[1:] / c[:-1])
            cc_log_returns = cc_log_returns[np.isfinite(cc_log_returns)]
            
            if len(cc_log_returns) < 2:
                results.append(np.nan)
                continue
            
            sigma_sd = np.std(cc_log_returns, ddof=1)
            
            # sigma_r_s : Rogers & Satchell
            H = np.log(h / o)
            L = np.log(l / o)
            C = np.log(c / o)
            
            # Filtre aussi les valeurs infinies ici
            rs_values = H * (H - C) + L * (L - C)
            rs_values = rs_values[rs_values >= 0]  # Uniquement valeurs positives
            
            if len(rs_values) == 0:
                results.append(np.nan)
                continue
            
            sigma_rs_daily = np.sqrt(rs_values)
            sigma_rs_daily = sigma_rs_daily[np.isfinite(sigma_rs_daily)]
            
            if len(sigma_rs_daily) == 0:
                results.append(np.nan)
                continue
            
            sigma_rs = np.mean(sigma_rs_daily)
            
            # Yang & Zhang volatility
            sigma_yz = np.sqrt(sigma_oj**2 + k * sigma_sd**2 + (1 - k) * sigma_rs**2)
            
            # Check final value
            if np.isnan(sigma_yz) or sigma_yz == 0:
                results.append(np.nan)
                continue
            
            results.append(sigma_yz * annualization)
        
        return results

In [19]:
def get_correlation_factor(
        history: pl.DataFrame, 
        trade_signals: dict, 
        available_symbols: list,
        window: int = 90
    ):
        """
        Calculate Correlation Factor
        Utilise la colonne 'daily_return' pré-calculée
        """
        dtcol = "datetime" if "datetime" in history.columns else "timestamp"
        lookback_date = history[dtcol].max() - timedelta(days=window)
        
        available_symbols = sorted(available_symbols)
        all_returns = []
        
        for symbol in available_symbols:
            # Utilise directement daily_return
            daily = (
                history
                .filter(
                    (pl.col("symbol") == symbol) & 
                    (pl.col(dtcol) >= lookback_date) 
                )
                .select(['date', 'daily_return'])
                .drop_nulls()
                .unique(subset=['date'])
                .sort('date')
                .rename({'daily_return': symbol})
            )
            
            all_returns.append(daily)

        # Merge sur les dates communes
        returns_df = all_returns[0]
        for df in all_returns[1:]:
            returns_df = returns_df.join(df, on='date', how='inner')
        

        import pandas as pd
        returns_pd = returns_df.drop('date').to_pandas()
        
        if len(returns_pd) < 10:
            return np.sqrt(len(available_symbols))
        
        corr_matrix = returns_pd.corr()
        n_assets = len(available_symbols)
        
        # Calculate rho_bar
        summation = 0
        count = 0
        for i in range(n_assets - 1):
            for j in range(i + 1, n_assets):
                symbol_i = available_symbols[i]
                symbol_j = available_symbols[j]
                
                x_i = trade_signals[symbol_i]
                x_j = trade_signals[symbol_j]
                rho_ij = corr_matrix.loc[symbol_i, symbol_j]
                
                if not np.isnan(rho_ij):
                    summation += x_i * x_j * rho_ij
                    count += 1
        
        if count == 0:
            return np.sqrt(n_assets)
        
        rho_bar = (2 * summation) / (n_assets * (n_assets - 1))
        cf = np.sqrt(n_assets / (1 + (n_assets - 1) * rho_bar))
        
        return cf

In [20]:
def rebalance_portfolio_correct(
        history: pl.DataFrame,
        current_positions: dict,
        portfolio_target_sigma: float = 0.12,
        capital: float = 100000,
        window: int = 90,
        data_frequency: str = None
    ):
        """
        Rebalance portfolio - Baltas & Kosowski method
        
        Returns positions in NUMBER OF CONTRACTS
        
        Parameters:
        -----------
        history : pl.DataFrame
            DataFrame avec colonnes: datetime, symbol, open, high, low, close, daily_return
        portfolio_target_sigma : float
            Target annualized volatility (ex: 0.12 = 12%)
        capital : float
            Total capital disponible
        window : int
            Lookback window en jours pour correlation factor
        data_frequency : str
            Fréquence des données ('1h', '1d', '4h', etc.)
        """
        if data_frequency is None:
            # Auto-detect frequency
            dtcol = "datetime" if "datetime" in history.columns else "timestamp"
            sample = history.sort(dtcol).head(100)
            time_diffs = sample[dtcol].diff().drop_nulls()
            avg_diff_seconds = time_diffs.mean().total_seconds()
            
            if avg_diff_seconds < 3600 * 2:
                data_frequency = "1h"
            elif avg_diff_seconds < 3600 * 12:
                data_frequency = "4h"
            else:
                data_frequency = "1d"
        
        available_symbols = sorted(history['symbol'].unique().to_list())
        
        if len(available_symbols) == 0:
            return current_positions, {}
        
        # 1. Calculate components
        trade_signals = get_trading_signal(history)
        volatility = get_y_z_volatility(history, available_symbols, data_frequency=data_frequency)
        c_f_rho_bar = get_correlation_factor(history, trade_signals, available_symbols, window=window)
        
        # 2. Calculate weights and positions
        n_assets = len(available_symbols)
        new_positions = {}
        
        # Contract multipliers
        contract_multipliers = {
            'CL=F': 1000,   # 1000 barrels
            'RB=F': 42000,  # 42000 gallons
            'NG=F': 10000,  # 10000 MMBtu
            'HG=F': 25000,  # 25000 pounds
            'GC=F': 100,    # 100 troy ounces
            'SI=F': 5000,   # 5000 troy ounces
            'PL=F': 50,     # 50 troy ounces
        }
        
        for i, symbol in enumerate(available_symbols):
            signal = trade_signals[symbol]
            vol = volatility[i]
            
            if np.isnan(vol) or vol == 0:
                new_positions[symbol] = 0
                continue
            
            # Baltas & Kosowski weight formula
            weight = (signal * portfolio_target_sigma * c_f_rho_bar) / (n_assets * vol)
            weight = np.clip(weight, -1, 1)
            
            # Get last price
            last_price = history.filter(pl.col('symbol') == symbol)['close'].tail(1)[0]
            
            # Get contract multiplier
            multiplier = contract_multipliers.get(symbol, 1)
            
            # Calculate dollar allocation
            dollar_allocation = weight * capital

            # Calculate number of contracts
            contract_value = last_price * multiplier

            # Vérifier que contract_value est valide
            if np.isnan(contract_value) or contract_value == 0:
                new_positions[symbol] = 0
                continue

            num_contracts = dollar_allocation / contract_value

            # Vérifier que num_contracts est valide avant de l'arrondir
            if np.isnan(num_contracts) or not np.isfinite(num_contracts):
                new_positions[symbol] = 0
            else:
                new_positions[symbol] = round(num_contracts)
        
        return new_positions, {
            'weights': {symbol: (trade_signals[symbol] * portfolio_target_sigma * c_f_rho_bar) / (n_assets * volatility[i]) 
                    for i, symbol in enumerate(available_symbols)},
            'signals': trade_signals,
            'volatilities': dict(zip(available_symbols, volatility)),
            'correlation_factor': c_f_rho_bar
        }

In [21]:

# Usage
# Note: passez data_frequency="1h" si vos données sont horaires, "1d" si quotidiennes, etc.
new_positions, details = rebalance_portfolio_correct(
    history=df,
    current_positions={},
    portfolio_target_sigma=portfolio_target_sigma,
    capital=initial,
    window=window_days,
)


print("\n=== PORTFOLIO REBALANCE ===")
print(f"Capital: ${initial:,.0f}")
print(f"\nNew Positions (number of contracts):")
for symbol, contracts in new_positions.items():
    direction = "LONG" if contracts > 0 else "SHORT"
    print(f"  {symbol}: {abs(contracts)} contracts {direction}")

print(f"\nDetails:")
print(f"  Signals: {details['signals']}")
print(f"  Volatilities: {details['volatilities']}")
print(f"  Correlation Factor: {details['correlation_factor']:.4f}")
print(f"  Weights: {details['weights']}")



=== PORTFOLIO REBALANCE ===
Capital: $100,000,000

New Positions (number of contracts):
  CL=F: 166 contracts LONG
  CT=F: 145317 contracts LONG
  GC=F: 34 contracts LONG
  HE=F: 155390 contracts LONG
  HG=F: 116 contracts LONG
  HO=F: 3140525 contracts LONG
  KC=F: 18246 contracts LONG
  LE=F: 58176 contracts LONG
  NG=F: 88 contracts LONG
  PL=F: 68 contracts LONG
  RB=F: 110 contracts LONG
  SB=F: 340533 contracts LONG
  SI=F: 36 contracts LONG
  ZC=F: 29983 contracts LONG
  ZS=F: 11041 contracts LONG
  ZW=F: 15780 contracts LONG

Details:
  Signals: {'CL=F': 1.0, 'CT=F': 0.4498241935427978, 'GC=F': 1.0, 'HE=F': 0.8757159272922651, 'HG=F': 1.0, 'HO=F': 1.0, 'KC=F': 1.0, 'LE=F': 1.0, 'NG=F': 1.0, 'PL=F': 1.0, 'RB=F': 1.0, 'SB=F': 0.48983339456770403, 'SI=F': 1.0, 'ZC=F': 0.6756711566679355, 'ZS=F': 0.7431481011742328, 'ZW=F': 0.6451345958819059}
  Volatilities: {'CL=F': np.float64(0.6825328015363421), 'CT=F': np.float64(0.3407060946694674), 'GC=F': np.float64(0.5040990856817429), 'H

In [34]:
def backtest_baltas_kosowski_fractional(
        history: pl.DataFrame, 
        initial_capital=10000,
        portfolio_target_sigma=0.12,
        window: int = 90
    ):
        """
        Backtest avec positions fractionnaires
        Assume que 'daily_return' column existe déjà dans history
        """
        results = []
        capital = initial_capital

        
        history_with_month = history.with_columns(
            pl.col('datetime').dt.truncate('1mo').alias('month')
        )

        
        months = history_with_month['month'].unique().sort()

        for i, month in enumerate(months[1:]):
            hist_until_month = history.filter(pl.col('datetime') < month)
            
            # Calcule les weights
            _, details = rebalance_portfolio_correct(
                hist_until_month, 
                {}, 
                capital=capital,
                portfolio_target_sigma=portfolio_target_sigma,
                window=window
            )
            
            weights = details['weights']
            
            # Si un des composants est NaN, ça explique tout
            if any(np.isnan(v) for v in details['volatilities'].values()):
                print("⚠️ VOLATILITY NaN detected!")
                print(f"\n=== Month {i}: {month} ===")
                print(f"Data points: {len(hist_until_month)}")
                print(f"Signals: {details['signals']}")
                print(f"Volatilities: {details['volatilities']}")
                print(f"Weights: {details['weights']}")
                break
            
            if np.isnan(details['correlation_factor']):
                print("⚠️ CORRELATION FACTOR NaN detected!")
                print(f"\n=== Month {i}: {month} ===")
                print(f"Data points: {len(hist_until_month)}")
                print(f"Signals: {details['signals']}")
                print(f"Correlation Factor: {details['correlation_factor']}")
                print(f"Weights: {details['weights']}")
                break
            
            
            # Determine next month boundary
            if i+1 < len(months) - 1:
                next_month = months[i+2]
            else:
                next_month = month + timedelta(days=60)
            
            # Get month data
            month_data = history.filter(
                (pl.col('datetime') >= month) & 
                (pl.col('datetime') < next_month)
            )

            
            # Calculate returns using pre-computed daily_return
            month_returns = {}
            for symbol in weights.keys():
                symbol_data = month_data.filter(pl.col('symbol') == symbol)
                
                if len(symbol_data) > 1:
                    # ✅ Simple: prix final / prix initial - 1
                    start_price = symbol_data['close'].head(1)[0]
                    end_price = symbol_data['close'].tail(1)[0]
                    
                    if start_price > 0:
                        month_returns[symbol] = (end_price - start_price) / abs(start_price)
                    else:
                        month_returns[symbol] = 0
                else:
                    month_returns[symbol] = 0

            # Portfolio return = Σ(weight_i × return_i)
            portfolio_return = sum(
                weights[symbol] * month_returns[symbol] 
                for symbol in weights.keys()
            )
            
            capital *= (1 + portfolio_return)
  
            results.append({
                'month': month,
                'weights': weights,
                'returns': month_returns,
                'portfolio_return': portfolio_return,
                'capital': capital
            })
        
        return pl.DataFrame(results)

In [35]:
# TESTE CETTE VERSION
backtest_frac = backtest_baltas_kosowski_fractional(
    df, 
    initial_capital=initial,
    portfolio_target_sigma=portfolio_target_sigma
)

# Performance finale (CORRIGÉE)
final = backtest_frac['capital'].tail(1)[0]
total_return = (final - initial) / initial

print(f"\nInitial Capital: ${initial:,.0f}")
print(f"Final Capital: ${final:,.2f}")
print(f"Total Return: {total_return:.2%}")

# Calcule aussi le Sharpe
monthly_returns = backtest_frac['portfolio_return']
sharpe = (monthly_returns.mean() / monthly_returns.std()) * np.sqrt(12)
print(f"Sharpe Ratio: {sharpe:.2f}")


⚠️ VOLATILITY NaN detected!

=== Month 40: 2019-06-01 00:00:00 ===
Data points: 12835
Signals: {'CL=F': 1.0, 'CT=F': 0.45447695111796627, 'GC=F': 0.8855712877768195, 'HE=F': 0.7918299610472018, 'HG=F': 1.0, 'HO=F': 1.0, 'KC=F': 0.05502245729417945, 'LE=F': -0.29542912993799014, 'NG=F': 0.45169746384768433, 'PL=F': 0.01524390460150613, 'RB=F': 1.0, 'SB=F': -0.0003530173665399785, 'SI=F': 0.31558022125720275, 'ZC=F': 0.6950041605432907, 'ZS=F': 0.1602804274226815, 'ZW=F': 0.3909764424591795}
Volatilities: {'CL=F': np.float64(0.7876995118659016), 'CT=F': np.float64(0.5286637906037341), 'GC=F': np.float64(0.18324032702515025), 'HE=F': np.float64(0.8587020480705465), 'HG=F': np.float64(0.33656297347887704), 'HO=F': np.float64(0.5932459927585796), 'KC=F': np.float64(0.7296106534950582), 'LE=F': np.float64(0.3844199762785556), 'NG=F': np.float64(0.5795985621870117), 'PL=F': nan, 'RB=F': np.float64(0.6724712096675898), 'SB=F': np.float64(0.595606022254267), 'SI=F': np.float64(0.264929298597363

In [44]:
def create_trades_from_backtest(backtest_results: pl.DataFrame, tickers: list = ['CL=F'], initial_capital: float = 100000):
    """
    Crée des trades pour UN SEUL symbol (pour plotting)
    VERSION CORRIGÉE
    """
    trades_list = []
    
    for t in tickers:
        symbol_to_plot = t
        for i, row in enumerate(backtest_results.iter_rows(named=True)):
            month = row['month']
            weights = row['weights']
            portfolio_return = row['portfolio_return']
            capital = row['capital']
            
            # Weight pour le symbol spécifique
            if symbol_to_plot not in weights:
                continue
                
            weight = weights[symbol_to_plot]
            
            # Skip si weight trop petit
            if abs(weight) < 0.0001:
                continue
            
            # Action
            action = "BUY" if weight > 0 else "SELL"
            
            # IMPORTANT: Trouve le VRAI prix du symbol à cette date
            month_data = df.filter(
                (pl.col('symbol') == symbol_to_plot) &  # ← FILTRE LE BON SYMBOL
                (pl.col('datetime') >= month)
            )
            
            if len(month_data) == 0:
                continue
                
            price = month_data['close'].head(1)[0]  # ← PRIX RÉEL, pas weight !
            
            trades_list.append({
                'ticker': symbol_to_plot,
                'datetime': month,
                'position_number': i,
                'action': action,
                'price': price,  # ← Vrai prix CL (55-75)
                'quantity_usd': abs(weight) * capital,
                'position_size': abs(weight),
                'pnl': portfolio_return * capital if i > 0 else 0.0,
                'cumulative_capital': capital / initial_capital
            })
    
    return pl.DataFrame(trades_list)

In [45]:
tickers = [sym_value for _, sym_value in tickers_map.items()]

# UTILISE CETTE VERSION
trades_bk = create_trades_from_backtest(
    backtest_frac, 
    tickers=tickers,  # ← Spécifie CL uniquement
    initial_capital=initial
)
trades_bk

ticker,datetime,position_number,action,price,quantity_usd,position_size,pnl,cumulative_capital
str,datetime[μs],i64,str,f64,f64,f64,f64,f64
"""CL=F""",2016-02-01 00:00:00,0,"""BUY""",31.620001,2.1814e6,0.021424,0.0,1.018203
"""CL=F""",2016-03-01 00:00:00,1,"""BUY""",34.400002,2.4889e6,0.025523,-4.1245e6,0.975136
"""CL=F""",2016-04-01 00:00:00,2,"""BUY""",36.790001,7.3414e6,0.065689,1.6327e7,1.117593
"""CL=F""",2016-05-01 00:00:00,3,"""BUY""",44.779999,6.7278e6,0.061804,-2.8280e6,1.088558
"""CL=F""",2016-06-01 00:00:00,4,"""BUY""",49.009998,8.2225e6,0.071445,6.5891e6,1.150882
…,…,…,…,…,…,…,…,…
"""HE=F""",2019-01-01 00:00:00,35,"""BUY""",61.700001,2.2012e6,0.024522,5.4462e6,0.897613
"""HE=F""",2019-02-01 00:00:00,36,"""BUY""",56.349998,3.7562e6,0.041658,408050.989562,0.901675
"""HE=F""",2019-03-01 00:00:00,37,"""BUY""",56.400002,2.5524e6,0.027642,2.2234e6,0.923387
"""HE=F""",2019-04-01 00:00:00,38,"""BUY""",77.400002,6.6247e6,0.070835,1.1992e6,0.935227


In [38]:
from quanta.clients.chart import ChartClient
chart_client = ChartClient()
chart_client.plot(
    df_cl, 
    "cl=F",  
    trades_df=trades_bk, 
    theme='professional',
    x_axis_type='datetime'
)

Plotting 2479 bars for cl=F with x_axis_type='datetime'
With 40 trades


Exception: The (row, col) pair sent is out of range. Use Figure.print_grid to view the subplot grid. 

In [79]:
df_cl = df_cl.with_columns([
    pl.lit(details['signals']['CL=F']).alias('momentum_signal')
])

signals_over_time = []
for row in backtest_frac.iter_rows(named=True):
    signals_dict = row['weights']  # C'est un dict {'CL=F': 0.16, 'RB=F': -0.21}
    signals_over_time.append({
        'datetime': row['month'] + timedelta(hours=6),
        'signal': signals_dict.get('CL=F', 0)  # ← Extract CL=F weight
    })

signals_df = pl.DataFrame(signals_over_time)

# full join on datetime
df_cl = df_cl.join(signals_df, on='datetime', how='left').fill_null(strategy='forward').sort('datetime')


weights_over_time = []
for row in backtest_frac.iter_rows(named=True):
    weights_over_time.append({
        'datetime': row['month'] + timedelta(hours=6),
        'strategy_weight': row['weights']['CL=F']
    })

weights_df = pl.DataFrame(weights_over_time)

# 2. Join avec df_cl
df_cl = df_cl.join(weights_df, on='datetime', how='left').fill_null(strategy='forward')



In [80]:
traces = [
    Candlesticks(),
    Line('strategy_weight', name='Strategy Weight', color='purple'),
    Volume()
]
traces

[<quanta.utils.trace.Candlesticks at 0x13f083410>,
 <quanta.utils.trace.Line at 0x13ef23010>,
 <quanta.utils.trace.Volume at 0x13ef206d0>]

In [81]:
from quanta.clients.chart import ChartClient
chart_client = ChartClient()
chart_client.plot(
    df_cl, 
    "cl=F",  
    trades_df=trades_bk, 
    traces=traces,
    theme='professional',
    x_axis_type='datetime'
)

Plotting 2480 bars for cl=F with x_axis_type='datetime'
With 118 trades
