# GraphicalLassoCV Example

Last Update: July 22nd, 2020

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.covariance.GraphicalLasso.html

In [1]:
# Import all the necessary packages
import numpy as np
import pandas as pd
from sklearn.covariance import GraphicalLassoCV
from sklearn.covariance import GraphicalLasso

In [2]:
# Define true covariance matrix
true_cov = np.array([[0.8, 0.0, 0.2, 0.0],
                     [0.0, 0.4, 0.0, 0.0],
                     [0.2, 0.0, 0.3, 0.1],
                     [0.0, 0.0, 0.1, 0.7]])

# Set seed and generate X from multivaraite norm with specified covariance
np.random.seed(0)
X = np.random.multivariate_normal(mean=[0, 0, 0, 0], cov=true_cov, size=200)

In [3]:
# True precision matrix calculated from the inverse of true covariance matrix
true_prec = np.linalg.inv(true_cov)
true_prec

array([[ 1.51515152,  0.        , -1.06060606,  0.15151515],
       [ 0.        ,  2.5       ,  0.        ,  0.        ],
       [-1.06060606,  0.        ,  4.24242424, -0.60606061],
       [ 0.15151515,  0.        , -0.60606061,  1.51515152]])

In [4]:
# Fit the GraphicalLassoCV model
est = GraphicalLassoCV(max_iter = 1000).fit(X)

In [5]:
# The estimated covariance matrix from GraphicalLassoCV
np.around(est.covariance_, decimals=3)

array([[0.816, 0.051, 0.22 , 0.017],
       [0.051, 0.364, 0.018, 0.036],
       [0.22 , 0.018, 0.322, 0.094],
       [0.017, 0.036, 0.094, 0.69 ]])

In [6]:
# The estimated precision matrix from GraphicalLassoCV
np.around(est.precision_, decimals=3)

array([[ 1.521, -0.17 , -1.063,  0.116],
       [-0.17 ,  2.784, -0.   , -0.14 ],
       [-1.063, -0.   ,  3.982, -0.518],
       [ 0.116, -0.14 , -0.518,  1.524]])

In [7]:
# The list of lambdas used in cross validation
est.cv_alphas_

[0.22813020648178522,
 0.049149163068849436,
 0.026598029308124188,
 0.014394042927746526,
 0.010588866190156311,
 0.008807435274025985,
 0.008488861740795783,
 0.008181811323310077,
 0.007885867219221498,
 0.007789617396298063,
 0.007600627702330532,
 0.007325705577266539,
 0.006093256496935091,
 0.005068150002186581,
 0.00421550355833272,
 0.0022813020648178514,
 0]

In [8]:
# The lambda chosen by cross validation
est.alpha_

0.008181811323310077

In [9]:
# The index of the chosen lambda in the list of lambdas
ind_lambda = np.where(est.cv_alphas_ == est.alpha_)[0][0]
ind_lambda

7

In [10]:
#grid_scores_ndarray of shape (n_alphas, n_folds): Log-likelihood score on left-out data across folds.
np.round(est.grid_scores_, 3)

array([[-4.505, -4.166, -4.24 , -4.235, -4.238],
       [-4.314, -4.045, -4.167, -4.174, -4.165],
       [-4.281, -4.037, -4.161, -4.175, -4.171],
       [-4.263, -4.033, -4.162, -4.174, -4.178],
       [-4.257, -4.03 , -4.163, -4.175, -4.181],
       [-4.254, -4.028, -4.163, -4.176, -4.183],
       [-4.254, -4.028, -4.164, -4.176, -4.183],
       [-4.253, -4.028, -4.164, -4.176, -4.183],
       [-4.253, -4.027, -4.164, -4.176, -4.184],
       [-4.252, -4.027, -4.164, -4.176, -4.184],
       [-4.252, -4.027, -4.165, -4.176, -4.184],
       [-4.252, -4.027, -4.165, -4.176, -4.184],
       [-4.25 , -4.026, -4.167, -4.178, -4.186],
       [-4.248, -4.025, -4.169, -4.18 , -4.187],
       [-4.247, -4.025, -4.171, -4.181, -4.188],
       [-4.244, -4.023, -4.174, -4.184, -4.19 ],
       [-4.395, -4.038, -4.195, -4.209, -4.416]])

In [11]:
# Summary information 
alphas = np.array(est.cv_alphas_).reshape(len(est.cv_alphas_),1)
df_temp = pd.DataFrame(np.concatenate((alphas,est.grid_scores_),axis = 1),
                       columns = ['lambdas','score_fold1','score_fold2','score_fold3','score_fold4','score_fold5'])
df_temp['Total_score'] = est.grid_scores_.sum(axis = 1)
df_temp['Average_score'] = est.grid_scores_.sum(axis = 1)/est.grid_scores_.shape[1]
df_temp

Unnamed: 0,lambdas,score_fold1,score_fold2,score_fold3,score_fold4,score_fold5,Total_score,Average_score
0,0.22813,-4.504907,-4.165553,-4.23987,-4.234588,-4.237572,-21.382489,-4.276498
1,0.049149,-4.314145,-4.044966,-4.167367,-4.174215,-4.164886,-20.865579,-4.173116
2,0.026598,-4.280548,-4.037276,-4.161012,-4.175371,-4.170713,-20.82492,-4.164984
3,0.014394,-4.263416,-4.03282,-4.161703,-4.174424,-4.177955,-20.810318,-4.162064
4,0.010589,-4.256945,-4.02962,-4.162758,-4.175143,-4.181156,-20.805621,-4.161124
5,0.008807,-4.254074,-4.028186,-4.163405,-4.175665,-4.182822,-20.804152,-4.16083
6,0.008489,-4.253571,-4.027934,-4.163531,-4.175771,-4.183131,-20.803939,-4.160788
7,0.008182,-4.253089,-4.027693,-4.163656,-4.175877,-4.183434,-20.803749,-4.16075
8,0.007886,-4.252628,-4.027461,-4.164057,-4.175983,-4.183728,-20.803858,-4.160772
9,0.00779,-4.252478,-4.027386,-4.164211,-4.176019,-4.183823,-20.803917,-4.160783


In [12]:
# Find the precision matrix and number of zero entries in the matrix for each lambda
prec_mx_list = []
num_zeros_list = []

for i in range(len(est.cv_alphas_)):
    est_lambda = GraphicalLasso(alpha = est.cv_alphas_[i], max_iter = 1000).fit(X)
    prec_mx_list.append(est_lambda.precision_)
    
    non_zero = (np.abs(est_lambda.precision_) > 0.02)
    num_zeros = non_zero.shape[0] * non_zero.shape[1] - np.sum(non_zero*1)
    num_zeros_list.append(num_zeros)
    
    print('For lambda = ', est.cv_alphas_[i])
    print('Number of zero entries in the precision matrix is : ', num_zeros)

For lambda =  0.22813020648178522
Number of zero entries in the precision matrix is :  12
For lambda =  0.049149163068849436
Number of zero entries in the precision matrix is :  6
For lambda =  0.026598029308124188
Number of zero entries in the precision matrix is :  2
For lambda =  0.014394042927746526
Number of zero entries in the precision matrix is :  2
For lambda =  0.010588866190156311
Number of zero entries in the precision matrix is :  2
For lambda =  0.008807435274025985
Number of zero entries in the precision matrix is :  2
For lambda =  0.008488861740795783
Number of zero entries in the precision matrix is :  2
For lambda =  0.008181811323310077
Number of zero entries in the precision matrix is :  2
For lambda =  0.007885867219221498
Number of zero entries in the precision matrix is :  2
For lambda =  0.007789617396298063
Number of zero entries in the precision matrix is :  2
For lambda =  0.007600627702330532
Number of zero entries in the precision matrix is :  2
For lambda

In [13]:
# Add the number of zeros information to the summary information dataframe
df_temp['Num_Zeros'] = np.array(num_zeros_list)
df_temp

Unnamed: 0,lambdas,score_fold1,score_fold2,score_fold3,score_fold4,score_fold5,Total_score,Average_score,Num_Zeros
0,0.22813,-4.504907,-4.165553,-4.23987,-4.234588,-4.237572,-21.382489,-4.276498,12
1,0.049149,-4.314145,-4.044966,-4.167367,-4.174215,-4.164886,-20.865579,-4.173116,6
2,0.026598,-4.280548,-4.037276,-4.161012,-4.175371,-4.170713,-20.82492,-4.164984,2
3,0.014394,-4.263416,-4.03282,-4.161703,-4.174424,-4.177955,-20.810318,-4.162064,2
4,0.010589,-4.256945,-4.02962,-4.162758,-4.175143,-4.181156,-20.805621,-4.161124,2
5,0.008807,-4.254074,-4.028186,-4.163405,-4.175665,-4.182822,-20.804152,-4.16083,2
6,0.008489,-4.253571,-4.027934,-4.163531,-4.175771,-4.183131,-20.803939,-4.160788,2
7,0.008182,-4.253089,-4.027693,-4.163656,-4.175877,-4.183434,-20.803749,-4.16075,2
8,0.007886,-4.252628,-4.027461,-4.164057,-4.175983,-4.183728,-20.803858,-4.160772,2
9,0.00779,-4.252478,-4.027386,-4.164211,-4.176019,-4.183823,-20.803917,-4.160783,2


In [14]:
# Estimated precision matrix for the first lambda
prec_mx_list[0]

array([[ 1.22596272, -0.        , -0.        , -0.        ],
       [-0.        ,  2.74605582, -0.        , -0.        ],
       [-0.        , -0.        ,  3.10477899, -0.        ],
       [-0.        , -0.        , -0.        ,  1.44860969]])

In [15]:
# Estimated precision matrix for the second lambda
prec_mx_list[1]

array([[ 1.39660482, -0.03306993, -0.7758674 ,  0.        ],
       [-0.03306993,  2.74694758, -0.        , -0.        ],
       [-0.7758674 , -0.        ,  3.57628844, -0.24337161],
       [ 0.        , -0.        , -0.24337161,  1.46744183]])

In [16]:
# Estimated precision matrix for the chosen lambda
prec_mx_list[ind_lambda]

array([[ 1.52069737, -0.17033761, -1.06283639,  0.11603294],
       [-0.17033761,  2.78366128, -0.        , -0.14048774],
       [-1.06283639, -0.        ,  3.98232618, -0.51782878],
       [ 0.11603294, -0.14048774, -0.51782878,  1.52380577]])