In [1]:
from mrrce import MrRCE
from simulation_utils import generate_data, me
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, RidgeCV, MultiTaskLassoCV
import matplotlib.pyplot as plt
%matplotlib inline

#### Navigate

1. [Generate random dataset](#Generate-random-dataset)
1. [Fit with CV](#Fit-with-CV)
1. [Estimations](#Estimations)
1. [Compare to other estimators](#Compare-to-other-estimators)
1. [Fit with no CV](#Fit-with-no-CV)

##### Generate random dataset

In [2]:
# settings
params = dict(
n = 50,                          # num obs
p = 20,                          # num predictors
q = 5,                           # num tasks
sigma = 1,                       # coeff variance
corr_x = .7,                     # grid of values for rho (correlation coefficient)
sigma_err = 1,                   # correlation coefficient for predictors
err_corr = .9,                   # correlation coefficient for errors
g_sparse_level = .0,             # group sparsity level
sparse_level = .0,               # sparsity level
err_cov_type = 'ar'             # error covariance form. One of ['ar', 'equi', 'fgn', 'identity']
)

In [3]:
np.random.seed(1)
rho = .4 # correlation coefficient for coefficients
X, Y, B, Sigma, Sigma_X = generate_data(rho = rho, **params)

##### Fit with CV

In [4]:
m = MrRCE(max_iter=15, tol_glasso=.1, tol=1e-3, 
          cv=True, n_folds=3, n_lams=10, verbose=True) # init MrRCE

In [5]:
m.fit(X, Y) # fit MrRCE

Iter number 7 

##### Estimations

In [6]:
# coefficient matrix
B.round(5)

array([[-0.9364 , -0.82716, -0.31097,  1.10047,  1.52656],
       [ 1.67784,  0.21784, -0.15372,  0.28456,  1.11545],
       [ 0.61821, -0.43367, -2.09498,  0.32065, -0.74634],
       [-0.64194, -0.16023,  1.33785,  1.28063, -0.95908],
       [ 0.72821, -0.27804,  0.15327,  0.54571,  0.13871],
       [-1.39322, -1.60243, -0.23998, -1.90058, -0.01088],
       [ 0.63679, -1.18148, -0.10698, -0.87449,  0.12816],
       [-0.03911,  0.80449,  0.8507 , -0.87633, -1.43986],
       [ 0.50277,  2.04823,  2.10033,  1.33874,  1.87373],
       [ 0.49486,  2.12479,  0.87403,  0.15572,  1.92681],
       [ 1.33346,  0.5233 ,  0.45132,  1.8323 , -0.42377],
       [-1.04615, -1.17159, -1.03068, -2.24563, -0.39959],
       [-0.33494,  0.28132,  0.33921,  0.32351,  0.00905],
       [-0.77962,  0.09586, -0.16656,  0.76871, -0.85234],
       [ 2.55945,  2.35798,  0.43453, -0.20286,  2.23579],
       [ 1.22466, -0.29032,  2.16176, -0.21328,  0.2569 ],
       [ 0.67837, -0.69098, -0.65183, -0.95816, -0.63872

In [7]:
m.B_hat.round(5) # coefficient matrix

array([[-1.02498, -0.90094, -0.28147,  1.20656,  1.62085],
       [ 1.35481,  0.07742,  0.52815,  0.39366,  1.47878],
       [ 0.73136, -0.52513, -1.94865,  0.38004, -0.83299],
       [-0.68438, -0.10308,  0.97151,  1.34098, -0.87277],
       [ 0.89422, -0.19545,  0.19073,  0.55569, -0.07094],
       [-1.31694, -1.3362 , -0.71654, -1.92243, -0.0908 ],
       [ 0.57933, -1.09517, -0.06132, -1.00913,  0.17214],
       [-0.25145,  0.61121,  1.2484 , -0.7619 , -1.15521],
       [ 0.60714,  1.99345,  1.92364,  1.2347 ,  1.67217],
       [ 0.69856,  2.25745,  0.97307,  0.28633,  1.91231],
       [ 1.07939,  0.37042,  0.74046,  1.86818, -0.23343],
       [-0.97872, -1.05327, -1.36681, -2.37766, -0.56488],
       [-0.39077,  0.16203,  0.80881,  0.6807 ,  0.34179],
       [-0.61307,  0.3012 , -0.62988,  0.51438, -0.94209],
       [ 2.47876,  2.12303,  0.70562, -0.17664,  2.07426],
       [ 1.30607, -0.11859,  1.84084, -0.22512,  0.30023],
       [ 0.39209, -0.75225, -0.32123, -0.7455 , -0.43789

In [8]:
print('True (sigma, rho) = ({sig}, {rho})\nEstimated (sigma, rho) = ({est_sig:.3f}, {est_rho:.3f})'.\
      format(sig = params['sigma'], rho = rho, est_sig = m.sigma, est_rho = m.rho))

True (sigma, rho) = (1, 0.4)
Estimated (sigma, rho) = (1.060, 0.418)


In [9]:
# transform Sigma
Sigma.round(5)

array([[ 0.28   ,  0.22217, -0.73113, -0.16708, -0.40396],
       [ 0.22217,  0.45187, -0.85959, -0.05453, -0.32333],
       [-0.73113, -0.85959,  3.0233 ,  0.49361,  1.41024],
       [-0.16708, -0.05453,  0.49361,  0.34337,  0.41362],
       [-0.40396, -0.32333,  1.41024,  0.41362,  0.90146]])

In [10]:
# estimated transformed Sigma
m.Sigma.round(5)

array([[ 0.12843,  0.0817 , -0.25172, -0.0625 , -0.12958],
       [ 0.0817 ,  0.32496, -0.52163, -0.02291, -0.12003],
       [-0.25172, -0.52163,  1.78125,  0.28318,  0.6322 ],
       [-0.0625 , -0.02291,  0.28318,  0.24294,  0.22089],
       [-0.12958, -0.12003,  0.6322 ,  0.22089,  0.43894]])

##### Compare to other estimators

In [11]:
# OLS
lm = LinearRegression(fit_intercept=False).fit(X, Y)
B_ols = np.matrix(lm.coef_.T)
# Ridge
ridge = RidgeCV(fit_intercept=False).fit(X, Y)
B_ridge = np.matrix(ridge.coef_.T)
# Group Lasso
gl = MultiTaskLassoCV(fit_intercept=False).fit(X, Y)
B_gl = np.matrix(gl.coef_.T)

In [12]:
# Model Error
print(
"""
Model Error:
============
MrRCE: {mrrce:.3f}
OLS: {ols:.3f}
GL: {gl:.3f}
Ridge: {ridge:.3f}
""".format(mrrce = me(B, m.B_hat, Sigma_X), # me for Model Error
           ols = me(B, B_ols, Sigma_X),
           gl = me(B, B_gl, Sigma_X),
           ridge = me(B, B_ridge, Sigma_X))
)


Model Error:
MrRCE: 1.809
OLS: 3.299
GL: 2.640
Ridge: 2.759



##### Fit with no CV

In [13]:
m = MrRCE(max_iter=15, tol_glasso=.1, tol=1e-3, 
          cv=False, lam=.15, verbose=True) # init MrRCE

In [14]:
m.fit(X, Y) # fit MrRCE

Iter number 6 

In [15]:
m.B_hat.round(5) # coefficient matrix

array([[-1.00955, -0.90369, -0.24141,  1.19168,  1.64201],
       [ 1.31876,  0.07282,  0.54153,  0.43574,  1.48373],
       [ 0.74853, -0.50967, -2.03653,  0.35154, -0.90209],
       [-0.66698, -0.13144,  1.08021,  1.33318, -0.75903],
       [ 0.87902, -0.17317,  0.12417,  0.54371, -0.13882],
       [-1.31094, -1.34602, -0.6879 , -1.90349, -0.08126],
       [ 0.54822, -1.06665, -0.09282, -0.99677,  0.13075],
       [-0.23658,  0.59123,  1.28382, -0.76492, -1.08116],
       [ 0.60981,  1.98653,  1.93961,  1.2271 ,  1.67329],
       [ 0.70605,  2.24836,  0.9468 ,  0.30131,  1.85305],
       [ 1.06147,  0.39836,  0.71773,  1.83909, -0.20532],
       [-0.97551, -1.04075, -1.39038, -2.36607, -0.59011],
       [-0.38465,  0.13564,  0.88191,  0.68193,  0.39596],
       [-0.58744,  0.30484, -0.63163,  0.48659, -0.894  ],
       [ 2.4531 ,  2.12624,  0.63625, -0.15047,  1.9838 ],
       [ 1.29177, -0.11869,  1.90312, -0.22189,  0.38115],
       [ 0.36916, -0.73989, -0.3904 , -0.73137, -0.52953

In [16]:
# Model Error
print(
"""
Model Error:
============
MrRCE (no CV): {mrrce:.3f}
OLS: {ols:.3f}
GL: {gl:.3f}
Ridge: {ridge:.3f}
""".format(mrrce = me(B, m.B_hat, Sigma_X), # me for Model Error
           ols = me(B, B_ols, Sigma_X),
           gl = me(B, B_gl, Sigma_X),
           ridge = me(B, B_ridge, Sigma_X))
)


Model Error:
MrRCE (no CV): 1.928
OLS: 3.299
GL: 2.640
Ridge: 2.759

