# Simple Multi-Period Portfolio Optimization Example

## 1. Setup

#### 1.1 Load InvestOS module

In [1]:
# Add relative path to module lookup path...
import os
import sys
sys.path.insert(0, os.path.abspath('..'))

# ... then import module
import invest_os as inv

# Import other required modules
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np

#### 1.2 Load historical stock prices and volumes

Place in examples/data directory [data from Kaggle](https://www.kaggle.com/datasets/camnugent/sandp500/download?datasetVersionNumber=4)

Note: you will need to create (and sign into) a free Kaggle account

In [2]:
# A. Load S&P 500 tickers, names, and industries
dir_name = './data/'

df = pd.read_csv(
    dir_name + 'all_stocks_5yr.csv'
).rename(columns={'Name': 'asset'})

In [3]:
# B. Keep date, price (mid-way between bid and ask), and volume only
df = df[['asset', 'date', 'open', 'volume']]
df = df.rename(columns={'open': 'price'})

#### 1.3 Create (fake) historical stock bid/ask spreads

In [4]:
# Note: real data should be used for this, wherever possible
# For now, assume 1 cent per share
df['spread'] = 0.01

#### 1.4 Clean DF

In [5]:
# Keep stocks (assets) with less than 10 price nulls only
df = df.groupby('asset').filter(lambda g: g['price'].isnull().sum().sum() < 10)
# Make date type datetime
df['date'] = pd.to_datetime(df['date'])

#### 1.5 Calculate returns

In [6]:
df['price_t+1'] = df.groupby(['asset'])['price'].shift(-1)

In [7]:
df['return'] = df['price'] / df['price_t+1'] - 1 # fwd looking return

In [8]:
# Keep required columns only
df = df[['asset', 'date', 'price', 'return', 'volume', 'spread']]

# Drop null returns
df = df[df['return'].notnull()]

#### 1.6 Split dfs into historical and forecast

In [9]:
date_split_forecasts = '2016-01-01'

df_historical = df[df.date < date_split_forecasts]
df_forecast = df[df.date >= date_split_forecasts][['asset', 'date', 'return']]
df_actual = df_forecast.copy()

#### 1.7 Create (fake) forecasts

In [10]:
np.random.seed(0)

# Median return should be VERY close to 0...
print("Median return:", df_forecast['return'].median())

# ... so divide by 5x to reduce signal...
df_forecast['return'] /= 5

# ... then add guassian noise to daily returns
std = df_historical['return'].var() ** 0.5
noise = np.random.normal(0, std, size=df_forecast.shape[0])

df_forecast['return'] = df_forecast['return'] + noise

Median return: -0.0008328128253174416


In [11]:
# Make sure predictions aren't too accurate:

agree_on_sign = np.sign(df_forecast['return']) == np.sign(df_actual['return'])

print(
    "Return predictions have the right sign %.1f%% of the time" %
    ((agree_on_sign.sum() / agree_on_sign.shape[0]) * 100)
)

Return predictions have the right sign 54.8% of the time


## 2. Portfolio optimization

#### 2.1 Create portfolio optimization instance

In [12]:
strategy = inv.portfolio_optimization.strategy.RankLongShort(
    n_periods_held = 1,
    leverage = 1
)

po = inv.portfolio_optimization.Optimizer(
    df_forecast,
    df_actual,
    df_historical,
    strategy=strategy
)

In [13]:
backtest = po.optimize()

Optimizing...
Done simulating.


In [14]:
po.backtest.h

asset,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACN,ADBE,ADI,...,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZION,ZTS,cash
2016-01-04,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.000000e+08
2016-01-05,0.000000e+00,3.978854e+05,-3.887508e+05,-3.850426e+05,-3.973044e+05,0.000000e+00,4.058777e+05,-3.993549e+05,0.000000e+00,0.000000e+00,...,0.000000e+00,3.984191e+05,-3.965575e+05,4.010633e+05,0.000000e+00,-3.885979e+05,-3.934018e+05,0.000000e+00,3.920645e+05,1.005000e+08
2016-01-06,4.059924e+05,1.089907e+03,4.164817e+05,4.342006e+05,-4.927759e+02,-4.040336e+05,9.191418e+03,4.031650e+05,-4.051148e+05,0.000000e+00,...,0.000000e+00,4.055918e+05,-3.986870e+05,-4.035304e+05,3.970343e+05,8.414638e+03,-3.957060e+05,0.000000e+00,-4.785075e+03,1.010025e+08
2016-01-07,-4.018923e+05,3.889799e+05,-3.943074e+05,-7.841711e+05,-8.009647e+05,-4.110939e+05,4.121538e+05,-8.090535e+05,-4.175769e+05,0.000000e+00,...,0.000000e+00,4.043692e+05,-4.029959e+05,-1.158052e+04,-4.219790e+03,-3.962001e+05,-3.927643e+05,4.210535e+05,3.943764e+05,1.015075e+08
2016-01-08,4.144938e+05,4.048660e+05,-8.539440e+03,4.200937e+05,4.066431e+05,-8.329600e+05,1.526252e+04,3.957290e+05,-4.116179e+05,0.000000e+00,...,0.000000e+00,-3.748793e+03,5.084953e+03,-4.041423e+05,-1.566029e+04,6.304274e+02,1.536051e+04,1.514818e+04,-2.451627e+03,1.020151e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-31,-1.633875e+54,-1.082706e+53,-4.906097e+53,1.843579e+53,1.836171e+53,3.008155e+52,-9.206833e+53,-2.718463e+53,-9.659662e+53,3.647060e+52,...,1.373770e+54,-1.729878e+53,-2.226944e+53,1.245584e+54,-2.819122e+53,-8.435994e+53,3.974596e+53,-2.579529e+52,3.708821e+52,-2.625421e+40
2018-02-01,5.361356e+52,-8.597769e+53,8.055722e+53,2.322908e+53,1.940806e+52,-1.924727e+53,-8.324255e+53,2.489384e+53,-7.569051e+53,-3.222692e+53,...,-5.645718e+53,-2.169384e+53,-6.374567e+53,-3.335221e+53,1.996531e+53,3.208393e+53,7.423153e+53,-1.445844e+54,1.027085e+54,-9.115296e+39
2018-02-02,-1.836909e+54,-9.425892e+52,-2.851897e+53,1.852382e+53,2.336992e+51,2.001143e+52,-9.260241e+53,-2.722449e+53,-7.834933e+53,3.441841e+52,...,1.392492e+54,1.155629e+52,-2.328875e+53,1.308775e+54,-2.706935e+53,-8.455714e+53,5.777753e+53,-2.980734e+52,2.384138e+53,-2.266923e+40
2018-02-05,-1.590021e+53,-1.078736e+54,1.017805e+54,4.396761e+53,1.976806e+52,-1.997439e+53,-8.337195e+53,4.472508e+53,-7.768212e+53,-3.292986e+53,...,-3.676873e+53,-4.134284e+53,-4.529811e+53,-2.785812e+53,2.090669e+52,1.324979e+53,7.525000e+53,-1.478504e+54,1.036847e+54,-1.372001e+40


-5.80170738974912e+22

In [16]:
# TBUS:

# [ ] Check that all inputs passed in (required or otherwise) are in correct format
# [ ] --> Else throw error with message saying as much