In [35]:
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import lasso_path, enet_path
import transethnic_prs.model1.Model1Blk as model1blk

In [36]:
n1 = 500
n2 = 300
p = 1020
pi0 = 0.9
X1 = np.random.normal(size=(n1, p))
X2 = np.random.normal(size=(n2, p))
beta = np.random.normal(size=(p))
zero_ind = np.random.rand(p) < pi0
beta[zero_ind] = 0
y1 = X1 @ beta + np.random.normal(size=(n1), scale=20)
y2 = X2 @ beta + np.random.normal(size=(n2), scale=20)

In [37]:
X1 = X1 - X1.mean(axis=0)
y1 = y1 - y1.mean()
X2 = X2 - X2.mean(axis=0)
y2 = y2 - y2.mean()
b1 = X1.T @ y1  # e.g. b1 = (N1 - 1) diag(X1.cov()) bhat1 where bhat1 is the GWAS effect size estimate
A1 = X1.T @ X1  # e.g. A1 = (N1 - 1) X1.cov()
print(b1)

[  24.14606436  206.20246983  456.51283593 ... -434.18840643  348.56578682
 -224.3214701 ]


In [31]:
print(A1.shape,A1.dtype,A1.flags)
print(b1.shape,b1.dtype,b1.flags)
print(X1.shape,X1.dtype,X1.flags)
print(X2.shape,X2.dtype,X2.flags)
print(y2.shape,y2.dtype)
type(X2)
X2

(1020, 1020) float64   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

(1020,) float64   C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

(500, 1020) float64   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

(300, 1020) float64   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

(300,) float64


array([[-0.98579962, -1.35490581, -0.26855968, ..., -0.15833578,
        -0.57792712, -1.00438152],
       [ 0.49033578, -0.19521814,  0.72075367, ...,  0.87947594,
        -1.62342993, -1.10883758],
       [-1.85537008,  1.42236794,  1.12896893, ..., -0.61319435,
         0.0922145 , -0.21630841],
       ...,
       [-0.92531314, -0.12382599,  0.7136271 , ...,  1.66802548,
         1.83693887, -0.22154025],
       [ 0.07340739, -1.18824607, -0.137767  , ...,  0.59840917,
         1.01750762, -2.52964387],
       [-0.27000554, -1.05706803,  0.79960573, ..., -1.21045393,
        -0.71788137,  0.82850086]])

In [38]:
mod1 = model1blk.Model1Blk([A1], [b1], [X2], y2)

In [39]:
l1_ratio = 0.1
t = time.time()
beta_mat_en, lambda_seq_en, niters_en, tols_en, convs_en = mod1.solve_path(alpha=l1_ratio) 
print(f'Run time = {time.time()-t} s')

Run time = 0.9082856178283691 s


In [40]:
t = time.time()
beta_mat_lasso, lambda_seq_lasso, niters_lasso, tols_lasso, convs_lasso = mod1.solve_path(alpha=1) 
print(f'Run time = {time.time()-t} s')

Run time = 17.464359521865845 s


In [34]:
X_test = np.concatenate([X1, X2],axis = 0)
print(X_test.shape, X_test.flags, X_test.dtype)
y_test = np.concatenate([y1, y2])
print(y_test.shape)
print(X_test)
print(y_test)

(800, 1020)   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False
 float64
(800,)
[[ 2.4405876  -0.00343609  0.90240405 ...  0.05285615  0.5600085
  -0.52435997]
 [ 0.4257453  -0.2765823  -0.11439722 ...  0.10222572  0.55190319
   1.66316364]
 [-0.4395994  -0.26036356 -1.08413119 ... -0.17258792  2.49663791
  -1.12195787]
 ...
 [-0.92531314 -0.12382599  0.7136271  ...  1.66802548  1.83693887
  -0.22154025]
 [ 0.07340739 -1.18824607 -0.137767   ...  0.59840917  1.01750762
  -2.52964387]
 [-0.27000554 -1.05706803  0.79960573 ... -1.21045393 -0.71788137
   0.82850086]]
[-4.66815723e+00 -2.42144694e+01  2.28362606e+00 -1.59940406e+01
 -1.11018861e+01  2.62304277e+00 -5.06555296e+00  1.41298671e+01
  4.10829057e+01 -2.16536691e+01 -1.69384789e+01 -2.20219222e+01
 -4.35128536e+00 -4.36649118e+00  3.60973979e+00 -5.54917698e-01
  2.08299221e+01  1.07066478e+01 -1.59333374e+01  3.41527952e+01
 -1.98191

In [33]:
# elastic net with l1_ratio = 0.1
# need eps=0.01 since my solver by default set lambda_min = lambda_max * 0.01
t = time.time()
alphas_enet, coefs_enet, kk = enet_path(
    np.concatenate([X1, X2], axis=0), 
    np.concatenate([y1, y2]), 
    l1_ratio=l1_ratio, 
    fit_intercept=False, 
    eps=0.01)
print(f'Run time = {time.time()-t} s')

Run time = 0.7866723537445068 s


In [30]:
t = time.time()
alphas_lasso, coefs_lasso, kk_lasso = lasso_path(
    np.concatenate([X1, X2], axis=0), 
    np.concatenate([y1, y2]), 
    fit_intercept=False, 
    eps=0.01)
print(f'Run time = {time.time()-t} s')

Run time = 4.807043790817261 s
