# Actor-Based Backtesting: Neo Modern Portfolio Theory (NMPT)

This notebook implements **Neo Portfolio Management** using **Hierarchical Risk Parity (HRP)**. 

### Why NMPT? 
Standard MPT (Mean-Variance) is notoriously sensitive to parameter estimation errors (the 'Markowitz Optimization' instability). 
NMPT approaches like HRP use machine learning (Hierarchical Clustering) to build a diversified portfolio based on the correlation structure of assets, without requiring expected return estimates.

### Strategy:
- Use RSI, MACD, and SMA Golden Cross to identify signals.
- Allocate capital using HRP among active signals.

In [None]:
import sys
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import uuid
from sqlalchemy import create_engine, StaticPool
from scipy.cluster.hierarchy import linkage
from scipy.spatial.distance import squareform

# 1. Setup Engine Paths
engine_root = os.path.abspath("../another_testing_engine/trade-engine/trade-engine")
if engine_root not in sys.path:
    sys.path.append(engine_root)

from tradeengine.actors.memory import MemPortfolioActor
from tradeengine.actors.sql import SQLOrderbookActor
from tradeengine.backtest import BacktestStrategy
from tradeengine.dto import Asset

print("Engine components loaded.")

## HRP Optimization Logic
This implementation performs Quasi-Diagonalization and Recursive Bisection to allocate risk parity across clusters.

In [None]:
def get_ivp(cov, **kargs):
    ivp = 1. / np.diag(cov)
    ivp /= ivp.sum()
    return ivp

def get_cluster_var(cov, c_items):
    cov_ = cov.loc[c_items, c_items]
    w_ = get_ivp(cov_)
    c_var = np.dot(np.dot(w_, cov_), w_)
    return c_var

def get_quasi_diag(link):
    link = link.astype(int)
    sort_ix = pd.Series([link[-1, 0], link[-1, 1]])
    num_items = link[-1, 3]
    while sort_ix.max() >= num_items:
        sort_ix.index = range(0, sort_ix.shape[0] * 2, 2)
        df0 = sort_ix[sort_ix >= num_items]
        i = df0.index
        j = df0.values - num_items
        sort_ix[i] = link[j, 0]
        df0 = pd.Series(link[j, 1], index=i + 1)
        sort_ix = pd.concat([sort_ix, df0])
        sort_ix = sort_ix.sort_index()
    return sort_ix.tolist()

def get_rec_bisection(cov, sort_ix):
    w = pd.Series(1, index=sort_ix)
    c_items = [sort_ix]
    while len(c_items) > 0:
        c_items = [i[j:k] for i in c_items for j, k in ((0, len(i) // 2), (len(i) // 2, len(i))) if len(i) > 1]
        for i in range(0, len(c_items), 2):
            c_items0 = c_items[i]
            c_items1 = c_items[i + 1]
            v0 = get_cluster_var(cov, c_items0)
            v1 = get_cluster_var(cov, c_items1)
            alpha = 1 - v0 / (v0 + v1)
            w[c_items0] *= alpha
            w[c_items1] *= 1 - alpha
    return w

def optimize_hrp(returns):
    corr = returns.corr()
    cov = returns.cov()
    dist = np.sqrt((1 - corr) / 2.)
    link = linkage(squareform(dist), 'single')
    sort_ix = get_quasi_diag(link)
    sort_ix = corr.index[sort_ix].tolist()
    hrp = get_rec_bisection(cov, sort_ix)
    return hrp.to_dict()

## Data Loading & Indicator Signals

In [None]:
data_dir = "../dataset/cleaned"
asset_files = sorted([f for f in os.listdir(data_dir) if f.startswith("Asset_") and f.endswith(".csv")])[:30]
assets = [f.split(".")[0] for f in asset_files]

dfs = {}
returns_list = {}
for a, f in zip(assets, asset_files):
    path = os.path.join(data_dir, f)
    df = pd.read_csv(path, parse_dates=True, index_col="Date").sort_index()
    dfs[a] = df
    returns_list[a] = df['Close'].pct_change()

returns_df = pd.DataFrame(returns_list)

def calculate_indicators(df):
    df = df.copy()
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    df['RSI'] = 100 - (100 / (1 + (gain/loss)))
    df['MACD'] = df['Close'].ewm(span=12).mean() - df['Close'].ewm(span=26).mean()
    df['Signal'] = df['MACD'].ewm(span=9).mean()
    df['SMA50'] = df['Close'].rolling(50).mean()
    df['SMA200'] = df['Close'].rolling(200).mean()
    return df

processed_dfs = {a: calculate_indicators(df) for a, df in dfs.items()}
print("Indicators calculated.")

## Strategy Rebalancing with NMPT (HRP)

In [None]:
active_assets = set()
all_dates = sorted(returns_df.index)
final_signals = {a: {} for a in assets}
last_rebal = None

for i, t in enumerate(all_dates):
    changed = False
    for a in assets:
        if t not in processed_dfs[a].index: continue
        row = processed_dfs[a].loc[t]
        if (row['RSI'] > 70 or (row['SMA50'] < row['SMA200'] and row['MACD'] < row['Signal'])) and a in active_assets:
            active_assets.remove(a)
            final_signals[a][t] = {'CloseOrder': {}}
            changed = True
        elif (row['SMA50'] > row['SMA200'] or (row['RSI'] < 30 and row['MACD'] > row['Signal'])) and a not in active_assets:
            if len(active_assets) < 15:
                active_assets.add(a)
                changed = True
    
    if (changed or (last_rebal is None or t.month != last_rebal.month)) and len(active_assets) > 2:
        lookback = returns_df.loc[:t].tail(126)[list(active_assets)].dropna(how='all').fillna(0)
        if len(lookback) > 20:
            weights = optimize_hrp(lookback)
            for a, w in weights.items():
                final_signals[a][t] = {'TargetWeightOrder': {'size': float(w)}}
            last_rebal = t

formatted_signals = {a: pd.Series(sig).sort_index() for a, sig in final_signals.items()}
print("NMPT signals generated.")

## Running NMPT Backtest

In [None]:
portfolio_actor = MemPortfolioActor.start(funding=1_000_000.0)
db_engine = create_engine('sqlite://', connect_args={'check_same_thread': False}, poolclass=StaticPool)
orderbook_actor = SQLOrderbookActor.start(portfolio_actor, db_engine, strategy_id=str(uuid.uuid4()))
quote_frames = {a: dfs[a][['Open', 'High', 'Low', 'Close']] for a in assets}

bt = BacktestStrategy(orderbook_actor, portfolio_actor, quote_frames)
result = bt.run_backtest(formatted_signals)

perf = result.porfolio_performance
perf['performance'].plot(figsize=(12, 6), title="NMPT (Hierarchical Risk Parity) Performance", color='purple')
print("Total Return:", (perf['performance'].iloc[-1] - 1) * 100, "%")