# Multi-Task Lasso
This is another linear model that is trained to minimize the sum of two different objective functions, similar to Multi-Task Elastic Net.

https://scikit-learn.org/stable/modules/linear_model.html#multi-task-lasso

MultiTaskLasso()

{'alpha': 1.0,   
 'copy_X': True,   
 'fit_intercept': True,   
 'max_iter': 1000,   
 'normalize': 'deprecated',   
 'random_state': None,   
 'selection': 'cyclic',   
 'tol': 0.0001,   
 'warm_start': False}

In [None]:
# config 'all', 'vif_5' or 'vif_10'
vif = 'all'

In [None]:
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pathlib
import platform
import seaborn as sns
from sklearn import linear_model
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.linear_model import MultiTaskLasso
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from skopt import gp_minimize, space
import sys

from validation import cross_validation
from validation import performance_test_fixed
from validation import performance_test_shifted

date_format = "%Y-%m-%d"

pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.options.display.max_colwidth = None

In [None]:
my_os = platform.system()
print("OS in my system: ",my_os)

if my_os == "Windows":
    path = str(pathlib.Path().absolute()) + '\\'
    slash = '\\'
else:
    path = str(pathlib.Path().absolute()) + '/'
    slash = '/'

path_3 = path.replace('4_modelling', '3_data_pre-processing')

## Load Data

In [None]:
data_set = pd.read_csv(path_3 + 'data_artifacts' + slash + 'data_set_e_spx_3-' + vif + '.csv', index_col=0)

## Time Series Cross Validation on Default Model

In [None]:
default_param_model = MultiTaskLasso(alpha=0.8)
default_param_model.get_params()

In [None]:
default_param_model_scores = cross_validation (data_set, default_param_model, True)
pd.DataFrame(default_param_model_scores).describe()

In [None]:
ax = pd.DataFrame(default_param_model_scores).plot(figsize=(8,4))

## Hyperparameter Optimization with Time Series Cross Validation
- Bayesian optimization
- https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html

In [None]:
#%debug
# search space for the hyperparameters
#space = [space.Real(0.9, 1, name='alpha'), 
 #        space.Real(0.9, 1, name='l1_ratio')]
space = [(0,1)]

# objective function to minimize
def objective(params):
    alpha = params
    model = MultiTaskLasso(alpha=alpha)
    scores = cross_validation(data_set, model, True)
    return np.mean(scores['mse_valid'])

# perform the optimization
result = gp_minimize(objective, space)

# optimal point and function value
print("Optimal point:", result.x)
print("Function value at optimal point:", result.fun)
ax = pd.DataFrame(result.func_vals).plot(figsize=(12,4))

In [None]:
# safe to excel
prelim_result = {'Model': [default_param_model],
           'opt_params': [result.x],
           'fun_min': [result.fun]
          }

display(pd.DataFrame(prelim_result))
storage_name = str(default_param_model) + str(datetime.today().day) + str(datetime.today().hour) +".xlsx"
pd.DataFrame(prelim_result).to_excel(path + "artifacts" + slash + storage_name, index=None)

## Time Series Cross Validation on Optimal Model

In [None]:
# ---------------- config opt model -----------------
opt_model = MultiTaskLasso(alpha=0.9, l1_ratio=0.9)

In [None]:
opt_model_tscv_scores = cross_validation (data_set, opt_model, True)
pd.DataFrame(opt_model_tscv_scores).describe()

In [None]:
ax = pd.DataFrame(opt_model_tscv_scores).plot(figsize=(8,4))

## Model Performance on Test Set

In [None]:
# default parameter model performance on test set (unseen data)
default_param_model_test_shifted_scores = performance_test_shifted(data_set, default_param_model, True)
display(pd.DataFrame(default_param_model_test_shifted_scores).describe())

default_param_model_test_fixed_scores = performance_test_fixed(data_set, default_param_model, True)
print(default_param_model_test_fixed_scores)

In [None]:
# optimal parameter model performance on test set (unseen data)
opt_model_test_shifted_scores = performance_test_shifted(data_set, opt_model, True)
display(pd.DataFrame(opt_model_test_shifted_scores).describe())

opt_model_test_fixed_scores = performance_test_fixed(data_set, opt_model, True)
print(opt_model_test_fixed_scores)

In [None]:
ax = pd.DataFrame(opt_model_test_shifted_scores).plot(figsize=(8,4))

In [None]:
# safe to excel (parametric)
results = {'Model': [default_param_model],
           'vif': [vif],
           'opt_params': [result.x],
           'fun_min': [result.fun],
           'mse_tscv': [pd.DataFrame(opt_model_tscv_scores).mse_valid.mean()],
           'mae_tscv': [pd.DataFrame(opt_model_tscv_scores).mae_valid.mean()],
           'r2_tscv': [pd.DataFrame(opt_model_tscv_scores).r2_valid.mean()],
           'mse_test_shifte': [pd.DataFrame(opt_model_test_shifted_scores).mse_test.mean()],
           'mae_test_shifted': [pd.DataFrame(opt_model_test_shifted_scores).mae_test.mean()],
           'r2_test_shifted': [pd.DataFrame(opt_model_test_shifted_scores).r2_test.mean()],
           'mse_test_fixed': [opt_model_test_fixed_scores['mse_test']],
           'mae_test_fixed': [opt_model_test_fixed_scores['mae_test']],
           'r2_test_fixed': [opt_model_test_fixed_scores['r2_test']],   
            'opt_model': [opt_model.get_params()]
          }

display(pd.DataFrame(results))
storage_name = str(opt_model) + "_" + vif + str(datetime.today().day) + str(datetime.today().hour) + ".xlsx"
pd.DataFrame(results).to_excel(path + "artifacts" + slash + storage_name, index=None)