# Simple Multi-Period Portfolio Optimization Example

## 1. Setup

#### 1.1 Load InvestOS module

In [1]:
# Add relative path to module lookup path...
import os
import sys
sys.path.insert(0, os.path.abspath('..'))

# ... then import module
import invest_os as inv

# Import other required modules
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np

#### 1.2 Load historical stock prices and volumes

Place in examples/data directory [data from Kaggle](https://www.kaggle.com/datasets/camnugent/sandp500/download?datasetVersionNumber=4)

Note: you will need to create (and sign into) a free Kaggle account

In [2]:
# A. Load S&P 500 tickers, names, and industries
dir_name = './data/'

df = pd.read_csv(
    dir_name + 'all_stocks_5yr.csv'
).rename(columns={'Name': 'asset'})

In [3]:
# B. Keep date, price (mid-way between bid and ask), and volume only
df = df[['asset', 'date', 'open', 'volume']]
df = df.rename(columns={'open': 'price'})

#### 1.3 Create (fake) historical stock bid/ask spreads

In [4]:
# Note: real data should be used for this, wherever possible
# For now, assume 1 cent per share
df['spread'] = 0.01

#### 1.4 Clean DF

In [5]:
# Keep stocks (assets) with less than 10 price nulls only
df = df.groupby('asset').filter(lambda g: g['price'].isnull().sum().sum() < 10)

#### 1.5 Calculate returns

In [6]:
df['price_t-1'] = df.groupby(['asset'])['price'].shift(1)

In [7]:
df['return'] = df['price'] / df['price_t-1'] - 1

In [8]:
# Keep required columns only
df = df[['asset', 'date', 'price', 'return', 'volume', 'spread']]

# Drop null returns
df = df[df['return'].notnull()]

#### 1.6 Split dfs into historical and forecast

In [9]:
date_split_forecasts = '2016-01-01'

df_historical = df[df.date < date_split_forecasts]
df_forecast = df[df.date >= date_split_forecasts][['asset', 'date', 'return']]

#### 1.7 Create (fake) forecasts

In [10]:
np.random.seed(0)

# Add guassian noise to daily returns
std = df_historical['return'].var() ** 0.5
noise = np.random.normal(0, std, size=df_forecast.shape[0])

df_forecast['return'] = df_forecast['return'] + noise

## 2. Portfolio optimization

#### 2.1 Create portfolio optimization instance

In [11]:
po = inv.PortfolioOptimization(
    df_historical,
    df_forecast,
    type=inv.portfolio_optimization.type.RankLongShort
)

In [12]:
# last historical values: po.historical['price'].index.max()

po.optimize()

Optimizing...
Done optimizing. Check backtest_result object for more information


In [21]:
pd.Series(index=po.forecast['return'].columns, data=0)

asset
A       0
AAL     0
AAP     0
AAPL    0
ABBV    0
       ..
XYL     0
YUM     0
ZBH     0
ZION    0
ZTS     0
Length: 505, dtype: int64

In [13]:
# TBUS:

# [ ] Check that all inputs passed in (required or otherwise) are in correct format
# [ ] --> Else throw error with message saying as much