In [1]:
# Optional: run this cell only if you need to install missing packages.
# Comment out if already installed.
import sys
!{sys.executable} -m pip install --upgrade pip setuptools wheel
!{sys.executable} -m pip install MetaTrader5 pandas numpy matplotlib stable-baselines3 gymnasium==0.29.1 shimmy==0.2.1 torch
!{sys.executable} -m pip install ffmpeg-python pillow




In [2]:
# Cell 2
import os
import time
import json
import glob
from datetime import datetime
import numpy as np
import pandas as pd
import MetaTrader5 as mt5
import matplotlib.pyplot as plt
import joblib

from stable_baselines3 import PPO

print("MT5 Initialize:", mt5.initialize())




from stable_baselines3 import PPO

# ------ CONFIG ------
DATA_DIR = os.path.join("data", "multiasset")      # normalized CSVs and scalers
MODEL_DIR = os.path.join("models", "multiasset")
MODEL_FILE = os.path.join(MODEL_DIR, "ppo_multiasset.zip")
SCALER_GLOB = os.path.join(DATA_DIR, "*_scaler.csv")
EMBED_FILE = os.path.join(MODEL_DIR, "asset_embeddings.npy")   # optional
ASSET_MAP_FILE = os.path.join(DATA_DIR, "asset_to_idx.csv")

WINDOW = 50
TIMEFRAME = "M1"   # human-readable timeframe (not MT5 constant)
TF_MAP = { "M1": mt5.TIMEFRAME_M1, "M5": mt5.TIMEFRAME_M5, "M15": mt5.TIMEFRAME_M15,
           "M30": mt5.TIMEFRAME_M30, "H1": mt5.TIMEFRAME_H1, "H4": mt5.TIMEFRAME_H4,
           "D1": mt5.TIMEFRAME_D1 }
TF_MT5 = TF_MAP[TIMEFRAME.upper()]

LOG_FILE = os.path.join(MODEL_DIR, "live_trade_logs.csv")
DRY_RUN = True   # Set False to actually place orders in MT5 (use with caution)

# Safety defaults for order params
DEFAULT_RISK_PCT = 0.005
MIN_LOT = 0.01
MAX_LOT = 1.0

os.makedirs(MODEL_DIR, exist_ok=True)


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [9]:
# Cell 3 - utilities

def make_safe_name(sym: str) -> str:
    return sym.replace(" ", "_").replace("/", "_").replace("(", "").replace(")", "").replace(".", "_")

def load_scalers(data_dir=DATA_DIR):
    """
    Load per-asset scaler CSVs into dict: {safe: {"mean": Series, "std": Series}}
    """
    scalers = {}
    for path in glob.glob(os.path.join(data_dir, "*_scaler.csv")):
        safe = os.path.basename(path).replace("_scaler.csv","")
        df = pd.read_csv(path, index_col=0)
        #df = pd.read_csv(path, index_col=0, squeeze=False)
        # Expect columns 'mean' and 'std'
        scalers[safe] = {"mean": df['mean'], "std": df['std']}
    return scalers

def load_embeddings(embed_file=EMBED_FILE, data_dir=DATA_DIR):
    """
    Load embeddings (npy) and asset map. Returns embeddings dict {safe: array}
    """
    embeddings = {}
    if not os.path.exists(embed_file):
        print("No embeddings file found:", embed_file)
        return embeddings
    emb = np.load(embed_file, allow_pickle=False)
    # load asset map
    if os.path.exists(ASSET_MAP_FILE):
        #am = pd.read_csv(ASSET_MAP_FILE, index_col=0, squeeze=False)
        am = pd.read_csv(ASSET_MAP_FILE, index_col=0, squeeze=False)
        # am is a series-like mapping safe->idx perhaps; create mapping
        # convert to dict safe->idx
        try:
            # If CSV is like safe,index
            asset_to_idx = {str(k): int(v) for k,v in am.to_dict()[am.columns[0]].items()}
        except Exception:
            asset_to_idx = {str(k): int(v) for k,v in am.to_dict().items()}
        # build dict
        for safe, idx in asset_to_idx.items():
            if idx < emb.shape[0]:
                embeddings[safe] = emb[idx]
    else:
        # fallback: map based on files in data_dir in sorted order
        csvs = sorted(glob.glob(os.path.join(data_dir, "*_normalized.csv")))
        safe_list = [os.path.basename(p).replace("_normalized.csv","") for p in csvs]
        if len(safe_list) == emb.shape[0]:
            for i,safe in enumerate(safe_list):
                embeddings[safe] = emb[i]
        else:
            # cannot map reliably
            print("Warning: embeddings length does not match CSV count and no asset map - skipping embedding loading.")
    return embeddings

def load_normalized_datasets(data_dir=DATA_DIR, window=WINDOW):
    """
    Load *_normalized.csv into dict {safe: df} and only keep rows > window
    """
    datasets = {}
    for p in sorted(glob.glob(os.path.join(data_dir, "*_normalized.csv"))):
        safe = os.path.basename(p).replace("_normalized.csv","")
        df = pd.read_csv(p, index_col=0, parse_dates=True)
        # required columns or attempt mapping
        expected = ['o_pc','h_pc','l_pc','c_pc','v_pc','Close_raw']
        if not all(c in df.columns for c in expected):
            # try mapping columns lowercase
            lower = {c.lower():c for c in df.columns}
            if all(x in lower for x in ['open','high','low','close','volume']):
                df = df.rename(columns={lower['open']:'open', lower['high']:'high', lower['low']:'low', lower['close']:'close', lower['volume']:'volume'})
                tmp = pd.DataFrame(index=df.index)
                tmp['o_pc'] = df['open'].pct_change()
                tmp['h_pc'] = df['high'].pct_change()
                tmp['l_pc'] = df['low'].pct_change()
                tmp['c_pc'] = df['close'].pct_change()
                tmp['v_pc'] = df['volume'].pct_change()
                tmp['Close_raw'] = df['close']
                tmp = tmp.dropna()
                df = tmp
            else:
                raise ValueError(f"{p} missing required columns and cannot auto-convert.")
        else:
            df = df[expected].dropna()
        if len(df) > window:
            datasets[safe] = df
    if not datasets:
        raise FileNotFoundError(f"No usable normalized CSVs found in {data_dir}")
    return datasets


In [10]:
# Cell 4 - load preprocessors and model
scalers = load_scalers(DATA_DIR)
embeddings = load_embeddings(EMBED_FILE, DATA_DIR)
datasets = load_normalized_datasets(DATA_DIR, WINDOW)

# build safe_names mapping {raw_symbol: safe} - we expect raw symbol to match safe for MT5
# If your MT5 symbol names differ from safe names, create a mapping manually.
safe_names = {}
for safe in datasets.keys():
    # assume MT5 uses same display name as safe BUT with spaces (e.g. "Volatility 75 Index")
    # If you use the original symbols, you should supply a mapping. We'll attempt to reverse map by replacing underscores.
    raw_guess = safe.replace("_", " ")
    safe_names[raw_guess] = safe

print("Loaded datasets:", list(datasets.keys()))
print("Loaded scalers:", list(scalers.keys())[:10], " (total:", len(scalers),")")
print("Loaded embeddings:", list(embeddings.keys())[:10], " (total:", len(embeddings),")")

# Load model
if not os.path.exists(MODEL_FILE):
    raise FileNotFoundError("Trained model not found at: " + MODEL_FILE)
model = PPO.load(MODEL_FILE)
print("Loaded model:", MODEL_FILE)

# Initialize MT5 (for live trading)
if not mt5.initialize():
    raise RuntimeError("MT5 initialization failed. Start MT5 terminal and login to Deriv.")
print("MT5 initialized:", mt5.version())


TypeError: read_csv() got an unexpected keyword argument 'squeeze'

In [11]:
# Cell 5 - observation builder used in live loop
def fetch_and_build_obs(symbol, window, scalers, embeddings, safe_names):
    """
    Returns (obs, vol_est, last_price) or (None, None, None) on failure.
    - symbol: MT5 symbol string as used by mt5.copy_rates_from_pos(...)
    - safe_names: mapping raw_symbol -> safe (the safe key used in scalers/embeddings/datasets)
    """
    safe = safe_names.get(symbol) if symbol in safe_names else make_safe_name(symbol)

    # check required objects
    if safe not in scalers:
        print(f"❌ Missing scaler for: {safe}")
        return None, None, None
    if safe not in embeddings:
        print(f"❌ Missing embedding for: {safe}")
        return None, None, None
    if safe not in datasets:
        print(f"❌ No prepared dataset for: {safe}")
        return None, None, None

    scaler = scalers[safe]
    embed_vec = np.array(embeddings[safe], dtype=np.float32)

    count = window + 20
    bars = mt5.copy_rates_from_pos(symbol, TF_MT5, 0, count)
    if bars is None or len(bars) < window + 2:
        print(f"Insufficient bars for {symbol}")
        return None, None, None

    df = pd.DataFrame(bars)
    df['time'] = pd.to_datetime(df['time'], unit='s')
    df = df.set_index('time')
    df = df[['open','high','low','close','tick_volume']].rename(columns={'tick_volume':'volume'})

    # compute pct changes and take last window
    pct = df.pct_change().dropna()
    if len(pct) < window:
        print(f"Not enough pct rows for {symbol}")
        return None, None, None
    pct = pct.tail(window)

    # Normalize using scaler (scaler["mean"] and scaler["std"] are pandas Series)
    mean = scaler["mean"]
    std = scaler["std"].replace(0, 1.0)
    # Ensure indexes align: use pct columns order
    cols = ["open","high","low","close","volume"]
    # If scaler mean/std have slightly different index names, align by position
    try:
        m = mean[cols]
        s = std[cols]
    except Exception:
        # fallback: assume mean/std are in same order as these five columns
        m = mean.values[:len(cols)]
        s = std.values[:len(cols)]
        m = pd.Series(m, index=cols)
        s = pd.Series(s, index=cols)

    pct_norm = (pct[cols] - m) / s
    last_price = float(df['close'].iloc[-1])
    vol_est = float(pct['close'].std())

    balance_norm = np.full((window,1), 1.0, dtype=np.float32)
    # asset id: position of safe among keys -> normalized
    safe_list = list(datasets.keys())
    try:
        asset_id_val = safe_list.index(safe) / max(1,len(safe_list))
    except ValueError:
        asset_id_val = 0.0
    asset_id = np.full((window,1), asset_id_val, dtype=np.float32)

    emb_rep = np.tile(embed_vec.reshape(1,-1), (window,1)).astype(np.float32)

    obs = np.column_stack([
        pct_norm[cols].values.astype(np.float32),
        emb_rep,
        balance_norm,
        asset_id
    ]).astype(np.float32)

    return obs, vol_est, last_price


In [12]:
# Cell 6 - lot sizing and order placement (DRY_RUN safe)
def compute_lot_from_balance(balance, vol, price, risk_pct=DEFAULT_RISK_PCT, min_lot=MIN_LOT, max_lot=MAX_LOT):
    """
    Simple volatility-based lot sizing: risk_pct of balance / (volatility * price_scale)
    This is heuristic; customize for your broker's contract specifications.
    """
    risk_amount = balance * risk_pct
    vol = max(vol, 1e-8)
    # scaling factor adjusts units — change for your instrument contract size
    price_scale = 1000.0
    lot = risk_amount / (vol * price_scale)
    lot = max(min_lot, min(max_lot, round(lot, 2)))
    return float(lot)

def place_order(symbol, direction, lot, sl, tp, dry_run=True, comment="multiasset_live"):
    """
    Place a market order on MT5 or simulate if dry_run True.
    Returns order result object or a simulated dict.
    """
    tick = mt5.symbol_info_tick(symbol)
    if tick is None:
        print("❌ Can't get tick for", symbol)
        return None

    price = float(tick.ask if direction == "BUY" else tick.bid)
    if dry_run:
        # simulate a response-like dict
        res = {"retcode": 10009, "price": price, "comment": "DRY_RUN", "order": None}
        return res

    request = {
        "action": mt5.TRADE_ACTION_DEAL,
        "symbol": symbol,
        "volume": float(lot),
        "type": mt5.ORDER_TYPE_BUY if direction == "BUY" else mt5.ORDER_TYPE_SELL,
        "price": price,
        "sl": float(sl),
        "tp": float(tp),
        "deviation": 20,
        "magic": 234000,
        "comment": comment,
        "type_filling": mt5.ORDER_FILLING_FOK,
    }
    res = mt5.order_send(request)
    return res


In [13]:
# Cell 7 - single pass: build obs, predict, compute sl/tp, place (or simulate) order and log
def run_once_predict_and_place(symbols_list, model, scalers, embeddings, safe_names, window=WINDOW, dry_run=DRY_RUN):
    # get account balance or fallback
    acct = mt5.account_info()
    balance = float(acct.balance) if acct is not None else 10000.0

    # ensure log file header
    header = not os.path.exists(LOG_FILE)
    for sym in symbols_list:
        print(f"\n--- Processing: {sym} ---")
        obs, vol, last_price = fetch_and_build_obs(sym, window, scalers, embeddings, safe_names)
        if obs is None:
            print(f"Insufficient obs for {sym} - skipping.")
            continue

        # model.predict: expects batch dimension
        try:
            action, _ = model.predict(obs[np.newaxis, ...], deterministic=True)
            # action may be array-like
            if isinstance(action, (list, tuple, np.ndarray)):
                action = int(action[0])
            else:
                action = int(action)
        except Exception as e:
            print("Model prediction error:", e)
            continue

        if action == 0:
            print(f"{sym} -> HOLD")
            continue

        direction = "BUY" if action == 1 else "SELL"
        lot = compute_lot_from_balance(balance, vol, last_price)
        # heuristic sl/tp distances (multiples of vol * price)
        sl_dist = 1.5 * vol * last_price if vol > 0 else 0.01 * last_price
        tp_dist = 2.5 * vol * last_price if vol > 0 else 0.02 * last_price
        sl = last_price - sl_dist if direction == "BUY" else last_price + sl_dist
        tp = last_price + tp_dist if direction == "BUY" else last_price - tp_dist

        res = place_order(sym, direction, lot, sl, tp, dry_run=dry_run)

        # Normalize retcode and message
        retcode = res.get("retcode") if isinstance(res, dict) else getattr(res, "retcode", None)
        comment = res.get("comment") if isinstance(res, dict) else getattr(res, "comment", "")

        print(f"Placed {direction} for {sym} | lot {lot} | price {last_price:.5f} | retcode {retcode}")

        # Log
        entry = {
            "timestamp": datetime.utcnow().isoformat(),
            "symbol": sym,
            "direction": direction,
            "lot": lot,
            "price": last_price,
            "sl": sl,
            "tp": tp,
            "retcode": retcode,
            "comment": comment,
            "dry_run": dry_run
        }
        df_entry = pd.DataFrame([entry])
        df_entry.to_csv(LOG_FILE, mode="a", index=False, header=header)
        header = False

    print("\nSingle pass completed; trades logged to", LOG_FILE)


In [14]:
# Cell 8 - historical simulation evaluation
def simulate_trades_on_history(datasets, model, symbols=None, window=WINDOW, horizon=10):
    """
    Simulate trading with model on historical datasets.
    For each symbol, slide a window and ask the model to act; if action != 0, simulate an instantaneous market entry
    and close after 'horizon' bars; compute PnL based on price change and return aggregated metrics.
    This is a simple backtest for evaluation only.
    """
    trades = []
    symbols = symbols or list(datasets.keys())
    for safe in symbols:
        df = datasets[safe]
        # reconstruct an example mapping symbol name used in fetch_and_build_obs
        # We will build obs from the df directly rather than using MT5
        for i in range(window, len(df)-horizon):
            window_df = df.iloc[i-window:i]
            # build obs: normalized preprocessed already in df (o_pc ...), and need embedding & balance and asset id
            # Build obs to match training obs shape
            feat = window_df[['o_pc','h_pc','l_pc','c_pc','v_pc']].values.astype(np.float32)
            # embedding
            emb = embeddings.get(safe, np.zeros((0,)))
            emb_rep = np.tile(emb.reshape(1,-1),(window,1)) if emb.size>0 else np.zeros((window,0))
            balance_col = np.full((window,1), 1.0, dtype=np.float32)
            # asset id
            asset_id_val = list(datasets.keys()).index(safe) / max(1,len(datasets))
            asset_col = np.full((window,1), asset_id_val, dtype=np.float32)
            obs = np.concatenate([feat, emb_rep, balance_col, asset_col], axis=1)
            # predict
            try:
                action, _ = model.predict(obs[np.newaxis,...], deterministic=True)
                action = int(action[0]) if isinstance(action,(list,tuple,np.ndarray)) else int(action)
            except Exception:
                continue
            if action == 0:
                continue
            # Simulate entry at price at i (close of previous bar)
            entry_price = float(df['Close_raw'].iat[i-1])
            exit_price = float(df['Close_raw'].iat[i+horizon-1])
            pos = 1 if action==1 else -1
            pnl = (exit_price - entry_price) / entry_price * pos
            trades.append({"symbol": safe, "entry_i": i, "horizon": horizon, "action":action, "entry_price":entry_price, "exit_price":exit_price, "pnl":pnl})
    trades_df = pd.DataFrame(trades)
    return trades_df

def compute_trade_metrics(trades_df):
    if trades_df.empty:
        return {}
    pnl = trades_df['pnl']
    total = pnl.sum()
    mean = pnl.mean()
    std = pnl.std()
    wins = (pnl>0).sum()
    losses = (pnl<=0).sum()
    win_rate = wins / (wins+losses) if (wins+losses)>0 else 0.0
    # approximate daily Sharpe (assuming pnl is per trade; this is illustrative)
    sharpe = (mean / std) if std>0 else np.nan
    # max drawdown on cumulative
    cum = pnl.cumsum()
    roll_max = cum.cummax()
    dd = (cum - roll_max).min()
    metrics = {
        "n_trades": int(len(pnl)),
        "total_pnl": float(total),
        "mean_per_trade": float(mean),
        "std_per_trade": float(std),
        "win_rate": float(win_rate),
        "sharpe_approx": float(sharpe) if not np.isnan(sharpe) else None,
        "max_drawdown": float(dd)
    }
    return metrics


In [15]:
# Cell 9 - run simulation evaluation
print("Running historical simulation evaluation... (this may take a while)")
sim_trades = simulate_trades_on_history(datasets, model, symbols=list(datasets.keys()), window=WINDOW, horizon=10)
sim_trades.to_csv(os.path.join(MODEL_DIR, "simulated_trades.csv"), index=False)
metrics = compute_trade_metrics(sim_trades)
print("Simulation metrics:", json.dumps(metrics, indent=2))
# Quick summary plot of PnL distribution
if not sim_trades.empty:
    plt.figure(figsize=(8,4))
    plt.hist(sim_trades['pnl'], bins=60)
    plt.title("Distribution of trade returns (simulated)")
    plt.xlabel("Return")
    plt.ylabel("Count")
    plt.show()


Running historical simulation evaluation... (this may take a while)


NameError: name 'datasets' is not defined

In [16]:
# Cell 10 - run single live pass across symbols (DRY_RUN=True recommended)
symbols_live = list(safe_names.keys())  # raw symbol guesses mapped to safe names by safe_names mapping
print("Symbols to query (raw -> safe):")
for raw,safe in safe_names.items():
    print(" ", raw, "=>", safe)

# Example: user may want to explicitly set symbols to the MT5 names you use:
# symbols_live = ["Volatility 75 Index", "Volatility 10 Index", "EURUSD"]

run_once_predict_and_place(symbols_live, model, scalers, embeddings, safe_names, window=WINDOW, dry_run=DRY_RUN)


NameError: name 'safe_names' is not defined

In [None]:
# Cell 11 - analyze live trade log (simple)
if os.path.exists(LOG_FILE):
    df_log = pd.read_csv(LOG_FILE, parse_dates=['timestamp'])
    print("Last trades from log:")
    display(df_log.tail(10))
else:
    print("No trade log found at", LOG_FILE)
