# Portfolio
- Goals
    - Utilise different machine learnign models to train the financial data and build portfolio of stocks
- Data
    - Preprocess in path "data/~"
    - The data include five table from TEJ database, which include ~ 

## 1. Data and Import 

In [None]:
""" 
# in colab
!pip install optuna
# mount
from google.colab import drive
drive.mount('/content/drive')
# path
os.chdir('/content/drive/MyDrive/portfolio/data')
import sys
sys.path.append(sys.path[0] + f"/drive/MyDrive/portfolio") # current path + ~
# data
with open(f"{sys.path[-1]}/data/{industry}/data4model.pickle", "rb") as f:
    data = pickle.load(f)
"""

In [1]:
%load_ext autoreload
%autoreload 2'
# Preprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gc, pickle, warnings

from train import summary
from utils import *
warnings.filterwarnings('ignore')

# data
industry = "automobile"
with open(f"data/{industry}/data4model.pickle", "rb") as f:
    data = pickle.load(f)

# preprocess; most preprocess are done in data file, eg. standardise, ...
data = data.reset_index()
data["ymd"] = pd.to_datetime(data["ymd"], format="%Y-%m-%d")
data = data.set_index(["code", "ymd"])

  from .autonotebook import tqdm as notebook_tqdm


##  2. Train

### 2.1 Build porfolio and rolling prediciton

In [2]:

models = \
        {
        1: "linear",
        2: "elastic net",
        3: "decision tree", 
        4: "random forest", 
        5: "xgboost", 
        6: "svm",
        7: "neural network",
        8: "ensemble", 
        }

model_config = \
        {
        "industry": "automobile",
        "train_size": 5,    
        "test_size": 1,     
        "test_start": 2021,  

        "long_bound": 80,       
        "short_bound": 20,        
        "ls_decision": ["test", "no running"],
        "n_trials": 1,      
        "tune": True,
        "do_short": True, 
        "input_size": 89   
        }



In [6]:
for i in models:
    if i < 8:
        model_name = models[i]
        portfolio = summary(model_name, data, **model_config)
        portfolio.rolling_prediction()

param: {}
2021-02-01 00:00:00
   performance        ymd
0     0.000501 2021-01-11
param: {}
2021-03-01 00:00:00
   performance        ymd
0     0.046982 2021-02-09
param: {}
2021-04-01 00:00:00
   performance        ymd
0     0.070731 2021-03-10
param: {}
2021-05-01 00:00:00
   performance        ymd
0     0.000489 2021-04-12
param: {}
2021-06-01 00:00:00
   performance        ymd
0    -0.003549 2021-05-10
param: {}
2021-07-01 00:00:00
   performance        ymd
0     0.057722 2021-06-10
param: {}
2021-08-01 00:00:00
   performance        ymd
0     0.015723 2021-07-12
param: {}
2021-09-01 00:00:00
   performance        ymd
0     0.107917 2021-08-10
param: {}
2021-10-01 00:00:00
   performance        ymd
0     0.013065 2021-09-10
param: {}
2021-11-01 00:00:00
   performance        ymd
0     0.063845 2021-10-12
param: {}
2021-12-01 00:00:00
   performance        ymd
0     0.006682 2021-11-10
param: {}
2022-01-01 00:00:00
   performance        ymd
0     -0.01332 2021-12-10
param: {}
2022-0

In [5]:
# Check if short success
model_name = models[1]
performance_rolling, rplsw_rolling, market_rolling  = \
    models_get_result(industry, model_name, model_config) 
(rplsw_rolling["pred_ls"] == -1).sum()

202

Ensemble method

In [None]:
# Ensemble method

for i in models:
    if i < 8:
        model_name = models[i]
        performance_rolling, rplsw_rolling, market_rolling  = \
            models_get_result(industry, model_name, model_config) 
    else:
        break
    if i == 1:
        rplsw_rolling_ensem = rplsw_rolling
    else:
        rplsw_rolling_ensem["pred_ls"] += rplsw_rolling["pred_ls"]

In [None]:
def ensem_backtest(rplsw_rolling_ensem):
    rplsw_rolling_ensem = rplsw_rolling_ensem.copy()
    rplsw_rolling_ensem = rplsw_rolling_ensem[rplsw_rolling_ensem["pred_ls"] >= 3]
    rplsw_rolling_ensem["ensem_weight"] = rplsw_rolling_ensem["pred_ls"] / rplsw_rolling_ensem["pred_ls"].sum()
    ensem_return 


rplsw_rolling_ensem["pred_ls"] >= 3

### 2.2 Backtest
`# subplots vs subplot?`
- result in
    - result/industry/
    - result/industry/train_size_year_?

In [None]:
industry = "automobile"
models = {1: "linear",
         2: "elastic net",
         3: "decision tree", 
         4: "random forest", 
         5: "xgboost", 
         6: "svm", 
         7: "neural network",
         8: "ensemble",
         }

model_config = \
        {
        "industry": "automobile",
        "train_size": 5,    
        "test_size": 1,     
        "test_start": 2021,  

        "long_bound": 80,       
        "short_bound": 20,        
        "ls_decision": ["test", "no running"],
        "n_trials": 1,      
        "tune": True,
        "do_short": True, 
        "input_size": 89   # = input size | original dimension = input_size: 89
        }

for i in models:
    if i < 8:
        model_name = models[i]
        plot_model_result(industry, model_name, model_config)
        
        # performance_rolling rplsw_rolling, market_rolling, = \
        #    models_get_result(industry, model_name, model_config)        
        # n_long, n_short, trading_rate = trading(rplsw_rolling)