In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# 1. DATA LOADER

In [2]:
def load_stooq_stock(symbol, years=3):
    """
    Load daily stock data from Stooq for the given symbol.

    Parameters:
    - symbol : str (e.g. 'pg.us', 'ko.us')
    - years : int (number of years of data to keep)

    Returns:
    - df : pd.DataFrame indexed by Date with OHLCV columns
    """

    url = f"https://stooq.pl/q/d/l/?s={symbol}&i=d"
    df = pd.read_csv(url)

    # Rename to English
    df.rename(columns={
        'Data': 'Date',
        'Otwarcie': '1. open',
        'Najwyzszy': '2. high',
        'Najnizszy': '3. low',
        'Zamkniecie': 'close',
        'Wolumen': '5. volume'
    }, inplace=True)

    # Date handling
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    df.sort_index(inplace=True)

    # Cutoff to last N years
    cutoff_date = datetime.today() - timedelta(days=365 * years)
    df = df[df.index >= cutoff_date]

    return df

# 2. STATIONARITY CHECKER

In [3]:
from statsmodels.tsa.stattools import adfuller

def check_stationarity(price_series, signif=0.05):
    """
    ADF test on log prices.
    """
    series = np.log(price_series.dropna())

    result = adfuller(series)
    
    return {
        "ADF Statistic": result[0],
        "p-value": result[1],
        "Stationary": result[1] < signif
    }

def compute_hurst(price_series, max_lag=100):
    """
    Compute Hurst exponent using log-log regression.
    """
    series = np.log(price_series.dropna())
    
    lags = range(2, max_lag)
    tau = [np.std(series.diff(lag).dropna()) for lag in lags]
    
    poly = np.polyfit(np.log(lags), np.log(tau), 1)
    hurst = poly[0]
    
    return hurst

def run_diagnostics(df):
    return {
        "ADF_stationary": check_stationarity(df['close'])['Stationary'],
        "ADF_pvalue": check_stationarity(df['close'])['p-value'],
        "Hurst": compute_hurst(df['close'])
    }


# 3. FEATURE ENGINEERING

In [4]:
def compute_zscore(df, window=20):
    df = df.copy()
    df['mean'] = df['close'].rolling(window).mean()
    df['std'] = df['close'].rolling(window).std()
    df['z_score'] = (df['close'] - df['mean']) / df['std']
    return df


# 3.5 FILTER

In [5]:
def add_trend_filter(df, ma_window=50, slope_window=10, slope_thresh=0.0):
    df = df.copy()
    ma = df['close'].rolling(ma_window).mean()
    slope = ma.diff(slope_window)  # positive = uptrend
    # Allow MR when slope magnitude is small (near flat)
    df['mr_allowed_trend'] = slope.abs() <= slope.abs().rolling(ma_window).median()
    return df

def add_vol_filter(df, vol_window=20, lookback=100, mult=1.5):
    df = df.copy()
    df['ret'] = df['close'].pct_change()
    vol = df['ret'].rolling(vol_window).std()
    baseline = vol.rolling(lookback).mean()
    df['mr_allowed_vol'] = vol <= mult * baseline
    return df


# 4. STRATEGY

In [11]:
def build_positions_with_filters(df):
    df = df.copy()

    # Require filters (if present). Default allow if not present.
    allowed = pd.Series(True, index=df.index)
    if 'mr_allowed_trend' in df.columns:
        allowed &= df['mr_allowed_trend']
    if 'mr_allowed_vol' in df.columns:
        allowed &= df['mr_allowed_vol']
    if 'vol_ok' in df.columns:  # your volume filter if you use it
        allowed &= df['vol_ok']

    # Entry signal (only if allowed)
    df['entry_signal'] = 0
    df.loc[(df['z_score'] < -0.5) & allowed, 'entry_signal'] = 1
    df.loc[(df['z_score'] > 1) & allowed, 'entry_signal'] = -1

    # Hold logic
    df['position'] = df['entry_signal'].replace(0, np.nan).ffill().fillna(0)

    # Success exit (mean reached)
    exit_success = (
        ((df['position'] == 1) & (df['z_score'] >= 0)) |
        ((df['position'] == -1) & (df['z_score'] <= 0))
    )
    df.loc[exit_success, 'position'] = 0

    # Failure exit (assumption breaks)
    exit_fail = (
        ((df['position'] == 1) & (df['z_score'] < -2.5)) |
        ((df['position'] == -1) & (df['z_score'] > 2.5))
    )
    df.loc[exit_fail, 'position'] = 0

    return df


# 5. BACK-TESTING 

In [12]:
def simulate_strategy(df, initial_capital=100):
    df = df.copy()

    df['return'] = df['close'].pct_change()
    df['strategy_return'] = df['position'].shift(1) * df['return']

    portfolio = initial_capital * (1 + df['strategy_return']).cumprod()

    sharpe = (
        df['strategy_return'].mean() /
        df['strategy_return'].std()
    ) * np.sqrt(252)

    max_dd = (portfolio.cummax() - portfolio).max()

    return {
        'final_value': portfolio.iloc[-1],
        'sharpe': sharpe,
        'max_drawdown': max_dd
    }


# 6. TRADE ANALYSIS

In [13]:
def extract_trades(data2, position_col='position', price_col='close'):
    """
    Extract trade-level statistics from a position-based strategy.

    Returns a DataFrame with:
    entry_date, exit_date, direction, duration, trade_return
    """

    df = data2.copy()

    # Identify position changes
    df['pos_shift'] = df[position_col].shift(1).fillna(0)

    # Trade entry: 0 → 1 or 0 → -1
    df['trade_entry'] = (df['pos_shift'] == 0) & (df[position_col] != 0)

    # Trade exit: 1 → 0 or -1 → 0
    df['trade_exit'] = (df['pos_shift'] != 0) & (df[position_col] == 0)

    trades = []
    trade_id = 0
    entry_idx = None
    entry_price = None
    direction = None

    for i in range(len(df)):
        if df.iloc[i]['trade_entry']:
            trade_id += 1
            entry_idx = df.index[i]
            entry_price = df.iloc[i][price_col]
            direction = df.iloc[i][position_col]

        elif df.iloc[i]['trade_exit'] and entry_idx is not None:
            exit_idx = df.index[i]
            exit_price = df.iloc[i][price_col]

            # Trade return
            if direction == 1:
                trade_ret = (exit_price / entry_price) - 1
            else:
                trade_ret = (entry_price / exit_price) - 1

            duration = df.index.get_loc(exit_idx) - df.index.get_loc(entry_idx)


            trades.append({
                'trade_id': trade_id,
                'direction': direction,
                'entry_date': entry_idx,
                'exit_date': exit_idx,
                'duration_days': duration,
                'entry_price': entry_price,
                'exit_price': exit_price,
                'trade_return': trade_ret
            })

            entry_idx = None

    trades_df = pd.DataFrame(trades)
    return trades_df


# 7. Multi-stock runner

In [14]:
def run_stock(filepath, name):
    df = load_stock_data(filepath)
    df = compute_zscore(df)
    df = build_positions(df)

    results = simulate_strategy(df)
    trades = extract_trades(df)

    summary = {
        'stock': name,
        'ev_per_trade': trades['trade_return'].mean(),
        'win_rate': (trades['trade_return'] > 0).mean(),
        'avg_loss_duration': trades.loc[
            trades['trade_return'] < 0, 'duration_days'
        ].mean(),
        'sharpe': results['sharpe'],
        'max_drawdown': results['max_drawdown']
    }

    return summary


# 8. FINAL RESULTS

In [15]:
stocks = ['pg.us', 'ko.us', 'pep.us', 'jnj.us', 'mcd.us']

rows = []

for symbol in stocks:
    df = load_stooq_stock(symbol, years=3)

    diagnostics = run_diagnostics(df)

    df = compute_zscore(df)
    df = add_trend_filter(df)
    df = add_vol_filter(df)
    df = build_positions_with_filters(df)

    results = simulate_strategy(df)
    trades = extract_trades(df)

    rows.append({
        'stock': symbol,
        'ADF_stationary': diagnostics['ADF_stationary'],
        'Hurst': diagnostics['Hurst'],
        'EV_per_trade': trades['trade_return'].mean(),
        'Win_rate': (trades['trade_return'] > 0).mean(),
        'Sharpe': results['sharpe'],
        'Max_Drawdown': results['max_drawdown']
    })

results_df = pd.DataFrame(rows)
results_df


Unnamed: 0,stock,ADF_stationary,Hurst,EV_per_trade,Win_rate,Sharpe,Max_Drawdown
0,pg.us,False,0.327372,0.005444,0.642857,1.02306,10.342725
1,ko.us,False,0.478535,0.000857,0.660714,-0.083063,27.17119
2,pep.us,False,0.418374,0.000508,0.702128,0.034,24.855826
3,jnj.us,False,0.47147,-0.010899,0.53125,-0.835198,41.207297
4,mcd.us,False,0.463369,0.000256,0.653846,0.110963,21.609491


NameError: name 'comparison_df' is not defined

<Figure size 640x480 with 0 Axes>