In [74]:
import pandas as pd 
import numpy as np 
import pickle 
from gurobipy import * 

In [75]:
DATA_PATH = '../data/'

In [76]:
# model param
model_param_dict = {
    'theta': 0.5, # theta means the obj weights in alpha, i.e how importance is alpha in the obj
    'market_ub': 0.5, 
    'market_lb': 0, 
    'weight_ub': 0.05,  # buy 20 stocks at least
    'weight_lb': 0,  # buy 100 stocks at most
    'trans_cost': 0.01, 
    'bigM': 10, 
    'sector_ub': 0.3, 
    'sector_lb': 0.01
}

# Data Prep

In [77]:
# stock name arr 
with open(DATA_PATH + 'stock_name_arr.pkl', 'rb') as file:
    stock_name_arr = pickle.load(file)

# Stock time arr 
with open(DATA_PATH + 'stock_time_arr.pkl', 'rb') as file:
    stock_time_arr = pickle.load(file)
    
# Covariance matrix 
with open(DATA_PATH + 'cov_mat.pkl', 'rb') as file:
    cov_mat_arr = pickle.load(file)

# market dict
with open(DATA_PATH + 'market_dict.pkl', 'rb') as file:
    market_dict = pickle.load(file)
    
# sector dict
with open(DATA_PATH + 'sector_dict.pkl', 'rb') as file:
    sector_dict = pickle.load(file) 
    
# alpha 
with open(DATA_PATH + 'alpha.pkl', 'rb') as file:
    alpha_dict = pickle.load(file)
    
data_dict = {
    'stock_time_arr': stock_time_arr, 
    'stock_name_arr': stock_name_arr, 
    'cov_mat_arr': cov_mat_arr, 
    'market_dict': market_dict, 
    'sector_dict': sector_dict, 
    'alpha_dict': alpha_dict
}

EOFError: Ran out of input

# Model

In [None]:
class PortOpt:
    def __init__(self, data_dict, p_dict, init_port_=None):
        # params retrieve
        self.theta = p_dict['theta']
        self.market_ub = p_dict['market_ub']
        self.market_lb = p_dict['market_lb']
        self.weight_ub = p_dict['weight_ub']
        self.weight_lb = p_dict['weight_lb']
        self.sector_ub = p_dict['sector_ub']
        self.sector_lb = p_dict['sector_lb']
        self.trans_cost = p_dict['trans_cost']
        self.bigM = p_dict['bigM']
    
        # stock and market information retrieve
        self.stock_arr = data_dict['stock_name_arr']  # index of w
        self.stock_idx_arr = np.arange(0, len(self.stock_arr))
        
        self.alpha_dict = data_dict['alpha_dict']
        self.alpha_idx_dict = {}
        for idx in self.stock_idx_arr:
            key = self.stock_arr[idx]
            self.alpha_idx_dict[idx] = self.alpha_dict[key]
        self.cov_mat_arr = data_dict['cov_mat_arr']
        
        self.sector_dict = data_dict['sector_dict']
        self.sector_idx_dict = {}
        for key in self.sector_dict:
            arr = self.sector_dict[key]
            self.sector_idx_dict[key] = [np.where(self.stock_arr==i)[0][0] for i in arr]
            
        self.market_dict = data_dict['market_dict']
        self.market_idx_dict = {}
        for key in self.market_dict:
            arr = self.market_dict[key]
            self.market_idx_dict[key] = [np.where(self.stock_arr==i)[0][0] for i in arr]
        
        # port model 
        self.model = Model('Port Opt Model')
        self.init_port = init_port_
        self.var_dict = {} 
        self.__model_init()

    def optimize(self):
        self.model.optimize()
        
    def __model_init(self):
        self.__create_vars()
        self.__create_constrs()
        self.__create_obj()
        self.model.update()

    def __get_market_stock(self):  # TODO
        # return dict of arrays
        # keys of dict is the same as msci_dict
        pass  
        
    def __create_vars(self):
        # portfolio weights
#         self.var_dict['w'] = self.model.addVars(self.stock_idx_arr, vtype=GRB.CONTINUOUS, lb=0.0, name='w')
        self.var_dict['w'] = pd.Series(self.model.addVars(self.stock_idx_arr, name='w', lb=0.0, vtype=GRB.CONTINUOUS), index=self.stock_idx_arr)
        # portfolio change
        if self.init_port is None:
            pass 
        else:
            # portfolio change = last port - current port (only consider the sells part)
            self.var_dict['y'] = self.model.addVars(self.stock_idx_arr, vtype=GRB.CONTINUOUS, lb=0.0, name='y')

            # artificial var 
            self.var_dict['z'] = self.model.addVars(self.stock_idx_arr, vtype=GRB.BINARY, name='z')
        
        self.model.update()
        
    def __create_constrs(self):
        # 1 weights normalization
        self.model.addConstr(sum([self.var_dict['w'][i] for i in self.stock_idx_arr]) == 1, name='1_weights_normalization')
        
        # 2 limit weights per market
        for key in self.market_idx_dict:
            market_arr = self.market_idx_dict[key]
            self.model.addConstr((sum([self.var_dict['w'][i] for i in market_arr]) <= self.market_ub), name='2_1_weights_{}_market_ub'.format(key))
            self.model.addConstr((sum([self.var_dict['w'][i] for i in market_arr]) >= self.market_lb), name='2_1_weights_{}_market_lb'.format(key))
        
        # 3 limit weights per share 
        self.model.addConstrs((self.var_dict['w'][i] <= self.weight_ub for i in self.stock_idx_arr), name='3_weight_per_asset_ub')
        
        # 4 y = max{0, init_port - current port}
        if self.init_port is None:
            pass 
        else:
            self.model.addConstrs((self.init_port[i] - self.var_dict['w'][i] <= self.bigM * (1 - self.var_dict['z']) for i in self.stock_idx_arr), name='4c{}'.format(i))
            self.model.addConstrs((-self.var_dict['y'][i] + self.init_port[i] - self.var_dict['w'][i] <= self.bigM * self.var_dict['z'] for i in selff.stock_idx_arr), name='4b{}'.format(i))
            self.model.addConstrs((self.var_dict['y'][i] - self.init_port[i] + self.var_dict['w'][i] <= self.bigM * self.var_dict['z'] for i in selff.stock_idx_arr), name='4a{}'.format(i))

        # 5 limit weights per sector
        for key in self.sector_idx_dict:
            sector_arr = self.sector_idx_dict[key]
            self.model.addConstr((sum([self.var_dict['w'][i] for i in sector_arr]) <= self.sector_ub), name='5_weights_{}_sector_ub'.format(key))
            self.model.addConstr((sum([self.var_dict['w'][i] for i in sector_arr]) >= self.sector_lb), name='5_weights_{}_sector_lb'.format(key))
        
        self.model.update()

    def __create_obj(self):
        print(0)
#         min_obj = np.sum([self.var_dict['w'][i] * self.var_dict['w'][j] * self.cov_mat_arr[i, j] for i in self.stock_idx_arr for j in self.stock_idx_arr])
        min_obj = self.cov_mat_arr.dot(self.var_dict['w']).dot(self.var_dict['w'])
        print(1)
#         max_obj = np.sum([(self.alpha_idx_dict[i] * self.var_dict['w'][i]) for i in self.stock_idx_arr])
        max_obj = np.array(list(data_dict['alpha_dict'].values())).dot(self.var_dict['w'])
        print(2)
        # transaction cost
        if self.init_port is None:
            cost = 0 
        else:
            cost = np.sum([self.var_dict['y'][i] * self.trans_cost for i in self.stock_idx_arr])

        obj = self.theta * (max_obj - cost) - (1 - self.theta) * min_obj
        
        self.model.setObjective(obj, GRB.MAXIMIZE)
        self.model.update()
    
    def get_results(self):
        temp_results_list = [self.var_dict['w'][i].x for i in self.stock_idx_arr]
        results_arr = []
        for i in temp_results_list:
            if i > 1e-10: 
                results_arr.append(i)
            else:
                results_arr.append(0)
        return self.stock_arr, np.array(results_arr)

In [None]:
port = PortOpt(data_dict, model_param_dict)

# Backtest

In [None]:
port.optimize()
if port.model.Status == 4:
    port.model.computeIIS()
    port.model.write('IIS.ilp')

In [None]:
stock_arr, results_arr = port.get_results()

In [None]:
l = [port.var_dict['w'][i].x for i in port.stock_idx_arr]

In [None]:
len([i for i in port.stock_idx_arr if l[i] > 1e-10])