In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from mrrce import MrRCE
from simulations.simulation_utils import generate_data, model_error
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, RidgeCV, MultiTaskLassoCV
import matplotlib.pyplot as plt
%matplotlib inline

#### Navigate

1. [Generate random dataset](#Generate-random-dataset)
1. [Fit with CV](#Fit-with-CV)
1. [Estimations](#Estimations)
1. [Compare to other estimators](#Compare-to-other-estimators)
1. [Fit with no CV](#Fit-with-no-CV)

##### Generate random dataset

In [3]:
# settings
params = dict(
n = 50,                          # num obs
p = 20,                          # num predictors
q = 5,                           # num tasks
sigma = 1,                       # coeff variance
corr_x = .7,                     # grid of values for rho (correlation coefficient)
sigma_err = 1,                   # correlation coefficient for predictors
err_corr = .9,                   # correlation coefficient for errors
g_sparse_level = .0,             # group sparsity level
sparse_level = .0,               # sparsity level
err_cov_type = 'ar'             # error covariance form. One of ['ar', 'equi', 'fgn', 'identity']
)

In [4]:
np.random.seed(1)
rho = .4 # correlation coefficient for coefficients
X, Y, B, Sigma, Sigma_X = generate_data(rho = rho, **params)

##### Fit with CV

In [5]:
mrrce = MrRCE(verbose=True) # init MrRCE

In [6]:
mrrce.fit(X, Y) # fit MrRCE

2019-09-11 08:37:44,104: iter 1, loss 2.398151
2019-09-11 08:37:44,147: iter 2, loss 0.615972
2019-09-11 08:37:44,187: iter 3, loss -0.729327
2019-09-11 08:37:44,227: iter 4, loss -1.576275
2019-09-11 08:37:44,272: iter 5, loss -2.032753
2019-09-11 08:37:44,314: iter 6, loss -2.249618
2019-09-11 08:37:44,356: iter 7, loss -2.344970
2019-09-11 08:37:44,401: iter 8, loss -2.385307
2019-09-11 08:37:44,446: iter 9, loss -2.402189
2019-09-11 08:37:44,490: iter 10, loss -2.409225
2019-09-11 08:37:44,532: iter 11, loss -2.412099
2019-09-11 08:37:44,576: iter 12, loss -2.413288
2019-09-11 08:37:44,619: iter 13, loss -2.413782


In [7]:
print(f"selected regularization parameter is {mrrce.alpha_best:.4f}")

selected regularization parameter is 0.0015


##### Estimations

In [8]:
# coefficient matrix
B.round(2)

array([[-0.11, -1.43,  1.22,  1.23, -0.35],
       [ 1.02, -0.1 , -0.17,  0.84,  1.56],
       [ 0.32, -0.79, -2.04,  0.69, -0.52],
       [-1.18,  1.34,  0.73,  0.72, -0.75],
       [-0.08,  0.18, -0.13,  0.75,  0.57],
       [-1.47, -1.71,  0.03, -1.74, -0.26],
       [-0.72, -0.75, -0.58, -0.36,  1.02],
       [-0.28,  1.49, -0.34, -1.32, -0.24],
       [ 1.65,  1.62,  2.62,  0.96,  1.01],
       [ 2.22,  0.67,  1.74,  0.02,  0.93],
       [ 0.34,  1.5 , -0.28,  1.79,  0.37],
       [-0.89, -1.72, -0.84, -2.02, -0.41],
       [ 0.03,  0.32,  0.46,  0.11, -0.3 ],
       [-0.35,  0.39, -0.11,  0.37, -1.24],
       [ 3.12,  0.81,  0.63,  0.28,  2.55],
       [-0.75,  1.23,  0.82, -0.06,  1.91],
       [-0.41, -0.41, -1.34, -0.58,  0.48],
       [ 0.48,  0.04,  1.29,  0.26, -0.63],
       [-0.86, -1.14, -0.74, -1.39, -0.87],
       [ 1.06,  0.07, -0.1 , -0.04,  0.3 ]])

In [9]:
mrrce.Gamma.round(2) # coefficient matrix

array([[-0.18, -1.52,  1.24,  1.27, -0.15],
       [ 0.79, -0.17,  0.59,  0.89,  1.59],
       [ 0.4 , -0.73, -1.82,  0.59, -0.43],
       [-1.23,  1.24,  0.32,  0.84, -0.57],
       [ 0.13,  0.29, -0.05,  0.67,  0.47],
       [-1.42, -1.61, -0.62, -1.53, -0.31],
       [-0.81, -0.7 , -0.4 , -0.4 ,  0.8 ],
       [-0.46,  1.35,  0.17, -1.3 , -0.  ],
       [ 1.74,  1.59,  2.33,  0.86,  0.89],
       [ 2.37,  0.93,  1.56,  0.15,  1.01],
       [ 0.17,  1.29,  0.48,  1.64,  0.44],
       [-0.85, -1.69, -1.22, -2.01, -0.59],
       [-0.01,  0.36,  0.84,  0.21,  0.15],
       [-0.34,  0.48, -0.47,  0.37, -1.39],
       [ 3.16,  0.68,  0.82,  0.14,  2.41],
       [-0.71,  1.27,  0.59, -0.06,  1.89],
       [-0.6 , -0.46, -0.88, -0.45,  0.64],
       [ 0.48,  0.03,  1.26,  0.34, -0.54],
       [-0.82, -1.  , -0.89, -1.35, -0.71],
       [ 1.01, -0.01, -0.13, -0.14,  0.19]])

In [10]:
print(f"True (sigma, rho) = ({params['sigma']}, {rho})\nEstimated (sigma, rho) = ({mrrce.sigma:.3f}, {mrrce.rho:.3f})")

True (sigma, rho) = (1, 0.4)
Estimated (sigma, rho) = (1.056, 0.426)


In [11]:
# transform Sigma
Sigma.round(2)

array([[ 0.28,  0.02, -0.84,  0.  , -0.26],
       [ 0.02,  0.13, -0.01,  0.09,  0.05],
       [-0.84, -0.01,  3.82, -0.11,  0.93],
       [ 0.  ,  0.09, -0.11,  0.22,  0.13],
       [-0.26,  0.05,  0.93,  0.13,  0.55]])

In [12]:
# estimated transformed Sigma
mrrce.Sigma.round(2)

array([[ 0.18, -0.01, -0.47, -0.  , -0.17],
       [-0.01,  0.13,  0.09,  0.1 ,  0.1 ],
       [-0.47,  0.09,  3.56, -0.38,  0.71],
       [-0.  ,  0.1 , -0.38,  0.29,  0.14],
       [-0.17,  0.1 ,  0.71,  0.14,  0.49]])

##### Compare to other estimators

In [13]:
# OLS
lm = LinearRegression(fit_intercept=False).fit(X, Y)
B_ols = np.matrix(lm.coef_.transpose())
# Ridge
ridge = RidgeCV(fit_intercept=False).fit(X, Y)
B_ridge = np.matrix(ridge.coef_.transpose())
# Group Lasso
gl = MultiTaskLassoCV(fit_intercept=False).fit(X, Y)
B_gl = np.matrix(gl.coef_.transpose())

In [14]:
# Model Error
print(
f"""
Model Error:
============
MrRCE: {model_error(B, mrrce.Gamma, Sigma_X):.3f}
OLS: {model_error(B, B_ols, Sigma_X):.3f}
GL: {model_error(B, B_gl, Sigma_X):.3f}
Ridge: {model_error(B, B_ridge, Sigma_X):.3f}
"""
)


Model Error:
MrRCE: 1.882
OLS: 3.299
GL: 2.645
Ridge: 2.792



##### Fit with no CV

In [15]:
mrrce = MrRCE(
    alpha=3e-3,
    use_cv=False, 
    verbose=True
) # init MrRCE

In [16]:
mrrce.fit(X, Y) # fit MrRCE

2019-09-11 08:39:05,169: iter 1, loss 2.417206
2019-09-11 08:39:05,217: iter 2, loss 0.663573
2019-09-11 08:39:05,261: iter 3, loss -0.640970
2019-09-11 08:39:05,304: iter 4, loss -1.443197
2019-09-11 08:39:05,345: iter 5, loss -1.863802
2019-09-11 08:39:05,384: iter 6, loss -2.059526
2019-09-11 08:39:05,423: iter 7, loss -2.144564
2019-09-11 08:39:05,461: iter 8, loss -2.180230
2019-09-11 08:39:05,503: iter 9, loss -2.194865
2019-09-11 08:39:05,542: iter 10, loss -2.201070
2019-09-11 08:39:05,584: iter 11, loss -2.203620
2019-09-11 08:39:05,625: iter 12, loss -2.204570
2019-09-11 08:39:05,666: iter 13, loss -2.204956


In [17]:
mrrce.Gamma.round(2) # coefficient matrix

array([[-0.18, -1.52,  1.24,  1.27, -0.15],
       [ 0.79, -0.17,  0.59,  0.89,  1.6 ],
       [ 0.4 , -0.73, -1.82,  0.59, -0.43],
       [-1.22,  1.23,  0.32,  0.84, -0.57],
       [ 0.13,  0.29, -0.05,  0.67,  0.47],
       [-1.42, -1.61, -0.62, -1.53, -0.31],
       [-0.81, -0.7 , -0.39, -0.4 ,  0.8 ],
       [-0.45,  1.35,  0.17, -1.3 , -0.  ],
       [ 1.74,  1.59,  2.33,  0.86,  0.89],
       [ 2.37,  0.93,  1.56,  0.15,  1.  ],
       [ 0.17,  1.29,  0.48,  1.64,  0.45],
       [-0.85, -1.69, -1.22, -2.01, -0.59],
       [-0.01,  0.36,  0.84,  0.21,  0.15],
       [-0.34,  0.48, -0.47,  0.37, -1.39],
       [ 3.16,  0.68,  0.82,  0.14,  2.41],
       [-0.71,  1.27,  0.59, -0.06,  1.89],
       [-0.6 , -0.46, -0.88, -0.45,  0.64],
       [ 0.48,  0.03,  1.26,  0.34, -0.54],
       [-0.81, -1.  , -0.89, -1.35, -0.71],
       [ 1.01, -0.01, -0.12, -0.14,  0.19]])

In [18]:
# Model Error
print(
f"""
Model Error:
============
MrRCE (no GLASSO cv): {model_error(B, mrrce.Gamma, Sigma_X):.3f}
OLS: {model_error(B, B_ols, Sigma_X):.3f}
GL: {model_error(B, B_gl, Sigma_X):.3f}
Ridge: {model_error(B, B_ridge, Sigma_X):.3f}
"""
)


Model Error:
MrRCE (no GLASSO cv): 1.883
OLS: 3.299
GL: 2.645
Ridge: 2.792

