In [10]:
# etf_advisor.py  ← THE ONE AND ONLY FILE YOU'LL EVER NEED

import joblib
import pandas as pd
import yfinance as yf
import numpy as np
import os
from datetime import datetime

# -------------------------------------------------
# 1. Load your model (put the .pkl in same folder)
# -------------------------------------------------
model_path = "ETF_LongTerm_Model_20251201.pkl"
if not os.path.exists(model_path):
    print(f"ERROR: {model_path} not found! Put it in the same folder.")
    exit()

model = joblib.load(model_path)
print("Model loaded – December 1 2025 version\n")

# -------------------------------------------------
# 2. Download prices
# -------------------------------------------------
tickers = ['SPY','ACWI','EFA','VEU','VWO','VNQ','TLT','IEF','LQD','GLD','DBC','BWX']
prices = yf.download(tickers, period="max", auto_adjust=True, progress=False)['Close']
prices = prices[tickers].dropna()

# -------------------------------------------------
# 3. Rebuild EXACT 18 features (matches training 100%)
# -------------------------------------------------
returns = prices.pct_change()
mom_21  = prices.pct_change(21)
mom_63  = prices.pct_change(63)
mom_126 = prices.pct_change(126)
mom_252 = prices.pct_change(252)
mom_504 = prices.pct_change(504)
acceleration = prices.pct_change(21) - prices.pct_change(252)
near_52wh    = prices / prices.rolling(252).max()

spy_mom = prices['SPY'].pct_change(63)
dbc_mom = prices['DBC'].pct_change(63)
spread  = spy_mom - dbc_mom
anti = pd.DataFrame(index=prices.index, columns=tickers)
for t in tickers:
    anti[t] = -spread if t in ['DBC','GLD'] else spread

feature_list = [
    mom_21.add_suffix('_mom_21d'),
    mom_63.add_suffix('_mom_63d'),
    mom_126.add_suffix('_mom_126d'),
    mom_252.add_suffix('_mom_252d'),
    mom_504.add_suffix('_mom_504d'),
    (mom_252 - mom_21).add_suffix('_dual_mom'),
    (prices / prices.rolling(252).max() - 1).add_suffix('_dist_52whigh'),
    (prices / prices.rolling(50).mean()).add_suffix('_vs_sma50'),
    (prices / prices.rolling(200).mean()).add_suffix('_vs_sma200'),
    (prices.rolling(50).mean() > prices.rolling(200).mean()).astype(int).add_suffix('_golden_cross'),
    (returns.rolling(63).std() * np.sqrt(252)).add_suffix('_vol_63d'),
    (returns.rolling(252).std() * np.sqrt(252)).add_suffix('_vol_252d'),
    (mom_63.rank(axis=1, pct=True)).add_suffix('_rs_63d'),
    (mom_252.rank(axis=1, pct=True)).add_suffix('_rs_252d'),
    (mom_504.rank(axis=1, pct=True)).add_suffix('_rs_504d'),
    acceleration.add_suffix('_acceleration_21vs252'),
    near_52wh.add_suffix('_near_52wh'),
    anti.add_suffix('_anti_commodity_bias'),
]
features_all = pd.concat(feature_list, axis=1).dropna()

# -------------------------------------------------
# 4. Reasons (the ones you love)
# -------------------------------------------------
reasons = {
    'SPY':  'US market leadership & AI/tech growth engine',
    'ACWI': 'Global diversification – safest default',
    'EFA':  'Developed ex-US – attractive valuations',
    'VEU':  'Broad international exposure',
    'VWO':  'Emerging markets catch-up',
    'VNQ':  'Real estate recovery',
    'TLT':  'Long bonds rally on rate cuts',
    'IEF':  'Safe intermediate bonds',
    'LQD':  'Corporate bond spread compression',
    'GLD':  'Gold – inflation & war hedge',
    'DBC':  'Commodities tailwind',
    'BWX':  'International bonds & currency play',
}

# -------------------------------------------------
# 5. LIVE RECOMMENDATION (with reasons)
# -------------------------------------------------
def recommend(amount: float, date: str = None):
    day = prices.index[-1] if date is None else pd.to_datetime(date)
    day = prices.index[prices.index <= day][-1]

    row = features_all.loc[day]
    preds = {}
    for t in tickers:
        cols = [c for c in row.index if c.startswith(t + '_')]
        pred = model.predict(row[cols].values.reshape(1,-1), predict_disable_shape_check=True)[0]
        preds[t] = pred

    df = pd.DataFrame(list(preds.items()), columns=['ETF','2Y'])
    df = df.sort_values('2Y', ascending=False)

    best = df.iloc[0]['2Y']
    if best > 0.22:
        alloc = df.iloc[[0]].copy()
        alloc['Weight'] = 1.0
    elif best > 0.15:
        alloc = df[df['2Y'] > 0.15].head(3).copy()
        alloc['Weight'] = 1/len(alloc)
    else:
        alloc = pd.DataFrame({'ETF':['ACWI'], '2Y':[best], 'Weight':[1.0]})

    alloc['Amount €'] = (alloc['Weight'] * amount).round().astype(int)
    alloc['Reason'] = alloc['ETF'].map(reasons)

    print(f"\nLIVE RECOMMENDATION €{amount:,} — {day.date()}\n")
    print(f"{'ETF':<6} {'2Y Forecast':>12} {'Weight':>8} {'Amount €':>12}  Reason")
    print("—" * 75)
    for _, r in alloc.iterrows():
        print(f"{r.ETF:<6} {r['2Y']:+6.1%}      {r.Weight:>6.0%}   €{r['Amount €']:>8,}   → {r.Reason}")
    print(f"\nValid entire month")

# -------------------------------------------------
# 6. BACKTEST FUNCTION (your favorite Cell 10)
# -------------------------------------------------
def backtest(date_str="2022-10-15", amount=1000):
    target_date = pd.to_datetime(date_str)
    day = prices.index[prices.index <= target_date][-1]
    
    # Predict what model thought on that day
    row = features_all.loc[day]
    preds = {}
    for t in tickers:
        cols = [c for c in row.index if c.startswith(t + '_')]
        pred = model.predict(row[cols].values.reshape(1,-1), predict_disable_shape_check=True)[0]
        preds[t] = pred
    
    df = pd.DataFrame(list(preds.items()), columns=['ETF','Exp'])
    df = df.sort_values('Exp', ascending=False)
    
    best = df.iloc[0]['Exp']
    if best > 0.18:
        selected = df[df['Exp'] > 0.18].head(3).copy()
        selected['Weight'] = 1/len(selected)
    elif best > 0.10:
        selected = df.iloc[[0]].copy()
        selected['Weight'] = 1.0
    else:
        selected = pd.DataFrame({'ETF':['ACWI'], 'Exp':[best], 'Weight':[1.0]})
    
    selected['Amount €'] = (selected['Weight'] * amount).round().astype(int)
    
    # Actual 2-year performance
    end_date = day + pd.Timedelta(days=730)
    end_day = prices.index[prices.index >= end_date]
    end_day = end_day[0] if len(end_day)>0 else prices.index[-1]
    
    actual = {t: prices.loc[end_day,t]/prices.loc[day,t] - 1 for t in tickers}
    portfolio_ret = sum(selected.iloc[i]['Weight'] * actual.get(selected.iloc[i]['ETF'],0) for i in range(len(selected)))
    spy_ret = actual['SPY']
    
    print(f"\nBACKTEST: What the model said on {day.date()}\n")
    print(f"{'ETF':<6} {'Weight':<8} {'€':<10} {'Expected':<10} {'Actual 2Y':<10} {'vs SPY'}")
    print("—" * 70)
    for _, r in selected.iterrows():
        act = actual.get(r.ETF, 0)
        print(f"{r.ETF:<6} {r.Weight:>6.0%}   €{r['Amount €']:<6}   {r.Exp:+5.1%}    {act:+5.1%}     {act-spy_ret:+5.1%}")
    
    print(f"\nPortfolio actual return : {portfolio_ret:+.1%}")
    print(f"SPY actual return       : {spy_ret:+.1%}")
    print(f"Outperformance          : {portfolio_ret - spy_ret:+.1%}")
    print(f"Period: {day.date()} → {end_day.date()}")



Model loaded – December 1 2025 version



In [20]:
# -------------------------------------------------
# 7. RUN WHATEVER YOU WANT
# -------------------------------------------------
print("="*90)
recommend(100)                         
print("\n" + "="*90)
backtest("2025-1-30", amount=5000)     
print("\n" + "="*90)
backtest("2023-10-19", amount=500)                 
print("="*90)


LIVE RECOMMENDATION €100 — 2025-11-28

ETF     2Y Forecast   Weight     Amount €  Reason
———————————————————————————————————————————————————————————————————————————
DBC    +24.6%        100%   €     100   → Commodities tailwind

Valid entire month


BACKTEST: What the model said on 2025-01-30

ETF    Weight   €          Expected   Actual 2Y  vs SPY
——————————————————————————————————————————————————————————————————————
TLT       33%   €1667     +23.0%    +5.9%     -8.1%
VNQ       33%   €1667     +19.2%    +3.7%     -10.3%
VWO       33%   €1667     +19.1%    +22.3%     +8.3%

Portfolio actual return : +10.6%
SPY actual return       : +13.9%
Outperformance          : -3.3%
Period: 2025-01-30 → 2025-11-28


BACKTEST: What the model said on 2023-10-19

ETF    Weight   €          Expected   Actual 2Y  vs SPY
——————————————————————————————————————————————————————————————————————
EFA       33%   €167      +23.3%    +50.7%     -10.8%
ACWI      33%   €167      +22.8%    +58.4%     -3.1%
VEU    