# Simple Multi-Period Portfolio Optimization Example

## 1. Setup

#### 1.1 Load InvestOS module

In [1]:
# Add relative path to module lookup path...
import os
import sys
sys.path.insert(0, os.path.abspath('..'))

# ... then import module
import invest_os as investOS

# Import other required modules
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np

#### 1.2 Load historical stock prices and volumes

Place in examples/data directory [data from Kaggle](https://www.kaggle.com/datasets/camnugent/sandp500/download?datasetVersionNumber=4)

Note: you will need to create (and sign into) a free Kaggle account

In [2]:
# A. Load S&P 500 tickers, names, and industries
dir_name = './data/'

df = pd.read_csv(
    dir_name + 'all_stocks_5yr.csv'
).rename(columns={'Name': 'asset'})

In [3]:
# B. Keep date, price (mid-way between bid and ask), and volume only
df = df[['asset', 'date', 'open', 'volume']]
df = df.rename(columns={'open': 'price'})

#### 1.3 Create (fake) historical stock bid/ask spreads

In [4]:
# Note: real data should be used for this, wherever possible
# For now, assume 1 cent per share
df['spread'] = 0.01

#### 1.4 Clean DF

In [5]:
# Keep stocks (assets) with less than 10 price nulls only
df = df.groupby('asset').filter(lambda g: g['price'].isnull().sum().sum() < 10)

#### 1.5 Calculate returns

In [6]:
df['price_t-1'] = df.groupby(['asset'])['price'].shift(1)

In [7]:
df['return'] = df['price'] / df['price_t-1'] - 1

In [8]:
# Keep required columns only
df = df[['asset', 'date', 'return', 'volume', 'spread']]

# Drop null returns
df = df[df['return'].notnull()]

#### 1.6 Split dfs into historical and forecast

In [9]:
date_split_forecasts = '2016-01-01'

df_historical = df[df.date < date_split_forecasts]
df_forecast = df[df.date >= date_split_forecasts][['asset', 'date', 'return']]

#### 1.7 Create (fake) forecasts

In [10]:
np.random.seed(0)

# Add guassian noise to daily returns
std = df_historical['return'].var() ** 0.5
noise = np.random.normal(0, std, size=df_forecast.shape[0])

df_forecast['return'] = df_forecast['return'] + noise

## 2. Portfolio optimization

#### 2.1 Create portfolio optimization instance

In [11]:
po = investOS.PortfolioOptimization(
    df_historical,
    df_forecast
)

In [12]:
po.forecast

{'return': asset              A       AAL       AAP      AAPL      ABBV       ABC  \
 date                                                                     
 2016-01-04  0.008809 -0.001407 -0.018059 -0.027414  0.016085 -0.022747   
 2016-01-05  0.015609  0.003682  0.010294 -0.004918  0.013489  0.021139   
 2016-01-06  0.006325 -0.011918 -0.014197 -0.042710 -0.019085 -0.017576   
 2016-01-07 -0.059279  0.058511 -0.052279  0.000731 -0.015053  0.002191   
 2016-01-08 -0.037896  0.028886  0.032741 -0.000289  0.017664  0.003715   
 ...              ...       ...       ...       ...       ...       ...   
 2018-02-01 -0.005131  0.021172 -0.022067  0.018406 -0.011730 -0.042148   
 2018-02-02  0.004245 -0.023539 -0.043056  0.043321  0.028035  0.023255   
 2018-02-05 -0.017502 -0.013186 -0.040112 -0.038561 -0.046702 -0.027964   
 2018-02-06 -0.065165 -0.028957 -0.034628 -0.048224 -0.116507 -0.031852   
 2018-02-07  0.023366 -0.005342  0.029541  0.073379  0.045824 -0.039227   
 
 asset       

In [13]:
# TBUS:

# [ ] Check that all inputs passed in (required or otherwise) are in correct format
# [ ] --> Else throw error with message saying as much