# Simple Backtest Example - SPO

## 1. Setup

#### 1.1 Load InvestOS module

In [1]:
# Add relative path to module lookup path...
import os
import sys

sys.path.insert(0, os.path.abspath(".."))

# ... then import module
import investos as inv

# Import other required modules
import pandas as pd

pd.options.mode.chained_assignment = None  # default='warn'
pd.options.plotting.backend = "plotly"
import numpy as np

#### 1.2 Load historical stock prices and volumes

Place in examples/data directory [data from Kaggle](https://www.kaggle.com/datasets/camnugent/sandp500/download?datasetVersionNumber=4)

Note: you will need to create (and sign into) a free Kaggle account

In [2]:
# A. Load S&P 500 tickers, names, and industries
dir_name = "./data/"

df = pd.read_csv(dir_name + "all_stocks_5yr.csv").rename(columns={"Name": "asset"})

In [3]:
# B. Keep date, price (mid-way between bid and ask), and volume only
df = df[["asset", "date", "open", "volume"]]
df = df.rename(columns={"open": "price"})

#### 1.3 Create (fake) historical stock bid/ask spreads

In [4]:
# Note: real data should be used for this, wherever possible
# For now, assume 0.01% (i.e. 1 cent on $100.00 stock)
df["spread"] = 1 / 10_000  # EXPECTED AS A PERCENT

#### 1.4 Clean DF

In [5]:
# Keep stocks (assets) with less than 10 price nulls only
df = df.groupby("asset").filter(lambda g: g["price"].isnull().sum().sum() < 10)
# Make date type datetime
df["date"] = pd.to_datetime(df["date"])

#### 1.5 Calculate returns

In [6]:
df["price_t+1"] = df.groupby(["asset"])["price"].shift(-1)

In [7]:
df["return"] = df["price"] / df["price_t+1"] - 1  # fwd looking return

In [8]:
# Keep required columns only
df = df[["asset", "date", "price", "return", "volume", "spread"]]

# Drop null returns
df = df[df["return"].notnull()]

In [9]:
# Drop where return data insufficient
UNIQUE_RETURNS_P_THRESHOLD = 0.99

s_nunique = df.groupby(["asset"]).nunique()["return"]
max_unique_returns = s_nunique.max()
df_percent_returns_unique = (s_nunique / max_unique_returns).reset_index()
df_percent_returns_unique = df_percent_returns_unique.rename(
    columns={"return": "p_unique"}
)
df = df.merge(df_percent_returns_unique, on=["asset"], how="left")
df = df[df["p_unique"] >= UNIQUE_RETURNS_P_THRESHOLD]
df = df.drop(columns=["p_unique"])

#### 1.6 Split dfs into historical and forecast

In [10]:
df_actual = df
df_forecast = df[["asset", "date", "return"]]

In [11]:
def pivot_and_fill(
    df, values="return", columns="asset", index="date", fill_method="bfill"
):
    return pd.pivot(df, values=values, columns=columns, index=index).fillna(
        method=fill_method
    )

#### 1.7 Create (fake) forecasts

In [12]:
np.random.seed(0)

# Median (daily) return is VERY close to 0:
print("Median return:", df_forecast["return"].median())

# Cap returns at +- 10%...
df_forecast["return"] = df_forecast["return"].clip(-0.1, 0.1)

# ... then reduce signal...
df_forecast["return"] /= 10

# ... then add guassian noise to daily returns
std = df_actual["return"].var() ** 0.5
noise = np.random.normal(0, std, size=df_forecast.shape[0])

df_forecast["return"] = df_forecast["return"] + noise

Median return: -0.0007805182641272834


In [13]:
# Make sure predictions aren't too accurate:

agree_on_sign = np.sign(df_forecast["return"]) == np.sign(df_actual["return"])

print(
    "Return predictions have the right sign %.1f%% of the time"
    % ((agree_on_sign.sum() / agree_on_sign.shape[0]) * 100)
)

Return predictions have the right sign 52.4% of the time


In [14]:
df_actual_returns = pivot_and_fill(df_actual)
df_forecast_returns = pivot_and_fill(df_forecast)

cash_return = 0.03 / 252
df_actual_returns["cash"] = cash_return
df_forecast_returns["cash"] = cash_return

In [15]:
short_rates = pd.Series(index=df_forecast_returns.columns, data=0.003 / 252)
# Borrowing rate assumed to be equal to cash return.
# ELSE can set short rate on cash to number other than 0
# for delta on long/short cash return
short_rates["cash"] = 0

In [16]:
start_date_sample = "2015-12-31"
end_date_sample = "2016-12-31"

In [17]:
# std = df_actual['return'].var() ** 0.5
std = (
    df_actual_returns[
        (df_actual_returns.index > start_date_sample)
        & (df_actual_returns.index <= end_date_sample)
    ].var()
    ** 0.5
)
std["cash"] = 0

In [18]:
vol = pivot_and_fill(df_actual, values="volume")
vol = vol[(vol.index > start_date_sample) & (vol.index <= end_date_sample)].median()
vol["cash"] = 1

In [19]:
price = pivot_and_fill(df_actual, values="price")
price["cash"] = 1

In [20]:
spread = pivot_and_fill(df_actual, values="spread")

## 2. Portfolio optimization

#### 2.1 Create portfolio optimization instance

In [21]:
from investos.portfolio.cost_model import *
from investos.portfolio.constraint_model import *
from investos.portfolio.risk_model import *

In [22]:
strategy = inv.portfolio.strategy.SPO(
    actual_returns=df_actual_returns,
    forecast_returns=df_forecast_returns,
    costs=[
        ShortHoldingCost(short_rates=short_rates, exclude_assets=["cash"]),
        TradingCost(
            exclude_assets=["cash"],
            forecast_volume=vol,
            actual_prices=price,
            half_spread=(spread / 2),
            forecast_std_dev=std,
        ),
    ],
    constraints=[
        MaxShortLeverageConstraint(limit=0.3),
        MaxLongLeverageConstraint(limit=1.3),
        MinWeightConstraint(),
        MaxWeightConstraint(),
        LongCashConstraint(),
    ],
    cash_column_name="cash",
)

portfolio = inv.portfolio.BacktestController(
    strategy=strategy,
    start_date="2017-01-01",
    end_date="2018-01-01",
    hooks={
        "after_trades": [
            lambda backtest, t, u, h_next: print(".", end=""),
        ]
    },
)

In [None]:
backtest_result = portfolio.generate_positions()

Generating historical portfolio trades and positions...
.......................................................

In [None]:
backtest_result.v.plot()

In [None]:
backtest_result.summary

In [None]:
backtest_result.hit_rate().plot()

In [None]:
backtest_result.leverage.plot()

In [None]:
backtest_result.long_leverage.plot()

In [None]:
backtest_result.short_leverage.plot()