In [84]:
import numpy as np
import pandas as pd
import sklearn.model_selection as skm
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler


## Set up
I create a random variables "X1" and noise to build a dataset. 

In [69]:
np.random.seed(1)
X1 = np.random.normal(0,1,100)
noise = np.random.normal(0,1,100)
X1

array([ 1.62434536, -0.61175641, -0.52817175, -1.07296862,  0.86540763,
       -2.3015387 ,  1.74481176, -0.7612069 ,  0.3190391 , -0.24937038,
        1.46210794, -2.06014071, -0.3224172 , -0.38405435,  1.13376944,
       -1.09989127, -0.17242821, -0.87785842,  0.04221375,  0.58281521,
       -1.10061918,  1.14472371,  0.90159072,  0.50249434,  0.90085595,
       -0.68372786, -0.12289023, -0.93576943, -0.26788808,  0.53035547,
       -0.69166075, -0.39675353, -0.6871727 , -0.84520564, -0.67124613,
       -0.0126646 , -1.11731035,  0.2344157 ,  1.65980218,  0.74204416,
       -0.19183555, -0.88762896, -0.74715829,  1.6924546 ,  0.05080775,
       -0.63699565,  0.19091548,  2.10025514,  0.12015895,  0.61720311,
        0.30017032, -0.35224985, -1.1425182 , -0.34934272, -0.20889423,
        0.58662319,  0.83898341,  0.93110208,  0.28558733,  0.88514116,
       -0.75439794,  1.25286816,  0.51292982, -0.29809284,  0.48851815,
       -0.07557171,  1.13162939,  1.51981682,  2.18557541, -1.39

I create the independent varaible 'Y' as a function of powers of x, x^2 and x^3 with arbitrary coeffients of 5, -8, 3.  This is what my model is hopefully going to predict at the end of this.

In [70]:
Y = 10+ 5* X1 - 8*(X1**2) + 3*( X1 **3) + noise
Y

array([  9.42411067,   4.48491635,   5.08888475,  -7.68716083,
         9.18507354, -80.28926042,  11.04521832,  -0.7184346 ,
        10.6121105 ,   8.24175627,   8.21225133, -60.1698045 ,
         8.30190323,   5.87028885,  10.10808247, -10.48164217,
         8.84593192,  -4.19967398,  11.31845611,  11.1994888 ,
        -9.21836006,   8.96542961,  11.47739977,  12.84020927,
         8.34721216,   3.11875939,  10.8868159 ,  -3.80441098,
         6.8295052 ,  11.71243754,   1.54095888,   5.96564193,
         0.58296593,  -1.20184652,   2.9246724 ,   9.31185705,
        -9.2375265 ,   9.62677528,  10.77930828,  10.57752611,
         8.53866628,  -2.94102123,   1.41583726,  10.84107799,
        10.76324614,   2.93120669,  10.76168465,  13.62421044,
        10.72298856,  11.42640099,  10.5510551 ,   4.68015193,
        -9.59070001,   9.33604162,   9.02045244,  10.6855648 ,
        10.19899106,  10.02250741,  10.86274268,   9.11634565,
        -0.13003458,   8.60968608,  11.11352315,   7.42

I set up my dataframe called "data" and then add columns for x^2, x^3 ... x^10, named "X2", ..."X10"

In [71]:
data = pd.DataFrame({'X1': X1, 'Y': Y})
data.shape

(100, 2)

In [79]:
for i in range(2, 11):
    data[f'X{i}'] = data['X1'] ** i

In [80]:
data.head()

Unnamed: 0,X1,Y,X2,X3,X4,X5,X6,X7,X8,X9,X10
0,1.624345,9.424111,2.638498,4.285832,6.961671,11.308158,18.368354,29.836551,48.464863,78.723675,127.874436
1,-0.611756,4.484916,0.374246,-0.228947,0.14006,-0.085683,0.052417,-0.032066,0.019617,-0.012001,0.007342
2,-0.528172,5.088885,0.278965,-0.147342,0.077822,-0.041103,0.02171,-0.011466,0.006056,-0.003199,0.001689
3,-1.072969,-7.687161,1.151262,-1.235268,1.325403,-1.422116,1.525886,-1.637228,1.756694,-1.884878,2.022415
4,0.865408,9.185074,0.74893,0.64813,0.560897,0.485404,0.420073,0.363534,0.314605,0.272262,0.235617


data set above looks correct!

I assigne X to all of the X1 to X10 variables.  

In [74]:
X = data.drop(columns=['Y'])
Y = data['Y']
X

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10
0,1.624345,2.638498,4.285832,6.961671,1.130816e+01,1.836835e+01,2.983655e+01,4.846486e+01,7.872367e+01,1.278744e+02
1,-0.611756,0.374246,-0.228947,0.140060,-8.568260e-02,5.241688e-02,-3.206636e-02,1.961680e-02,-1.200071e-02,7.341509e-03
2,-0.528172,0.278965,-0.147342,0.077822,-4.110322e-02,2.170956e-02,-1.146638e-02,6.056216e-03,-3.198722e-03,1.689475e-03
3,-1.072969,1.151262,-1.235268,1.325403,-1.422116e+00,1.525886e+00,-1.637228e+00,1.756694e+00,-1.884878e+00,2.022415e+00
4,0.865408,0.748930,0.648130,0.560897,4.854043e-01,4.200726e-01,3.635340e-01,3.146051e-01,2.722617e-01,2.356173e-01
...,...,...,...,...,...,...,...,...,...,...
95,0.077340,0.005981,0.000463,0.000036,2.767087e-06,2.140067e-07,1.655129e-08,1.280078e-09,9.900132e-11,7.656768e-12
96,-0.343854,0.118235,-0.040656,0.013980,-4.806936e-03,1.652883e-03,-5.683498e-04,1.954292e-04,-6.719904e-05,2.310664e-05
97,0.043597,0.001901,0.000083,0.000004,1.574983e-07,6.866431e-09,2.993548e-10,1.305093e-11,5.689795e-13,2.480572e-14
98,-0.620001,0.384401,-0.238329,0.147764,-9.161391e-02,5.680070e-02,-3.521648e-02,2.183425e-02,-1.353725e-02,8.393108e-03


This is where I start to be less sure of what's happening.  I'm trying to split it into a training and testing data sets.

In [86]:
Xtrain, Xtest, Ytrain, Ytest = skm.train_test_split(X, Y, test_size=0.2, random_state=1)
Xtest

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10
80,-0.222328,0.04943,-0.01099,0.002443,-0.0005432156,0.0001207721,-2.685104e-05,5.969741e-06,-1.327242e-06,2.950831e-07
84,0.1983,0.039323,0.007798,0.001546,0.0003066271,6.080406e-05,1.205743e-05,2.390985e-06,4.741316e-07,9.402017e-08
33,-0.845206,0.714373,-0.603792,0.510328,-0.4313323,0.3645645,-0.3081319,0.2604348,-0.220121,0.1860475
81,-0.200758,0.040304,-0.008091,0.001624,-0.0003261107,6.546935e-05,-1.31435e-05,2.638664e-06,-5.297331e-07,1.063482e-07
93,-0.63873,0.407977,-0.260587,0.166445,-0.1063134,0.06790559,-0.04337337,0.02770389,-0.01769532,0.01130254
17,-0.877858,0.770635,-0.676509,0.593879,-0.5213416,0.4576641,-0.4017643,0.3526922,-0.3096138,0.2717971
36,-1.11731,1.248382,-1.394831,1.558459,-1.741282,1.945552,-2.173786,2.428793,-2.713716,3.032063
82,0.186561,0.034805,0.006493,0.001211,0.0002260002,4.216291e-05,7.865972e-06,1.467487e-06,2.737764e-07,5.10761e-08
69,-1.396496,1.950202,-2.72345,3.803288,-5.311278,7.41718,-10.35806,14.465,-20.20032,28.20967
65,-0.075572,0.005711,-0.000432,3.3e-05,-2.464883e-06,1.862754e-07,-1.407715e-08,1.063835e-09,-8.039581e-11,6.075649e-12


I then stardize the scale for the values.  They are all in the same units so I'm not entirely sure this is necessary. 

In [89]:
scaler = StandardScaler()
XtrainScaled = scaler.fit_transform(Xtrain)
XtestScaled = scaler.fit_transform(Xtest)

set up a 5 fold cross validation lasso ?  

In [90]:
lasso_cv = skl.LassoCV(cv=5)

Finds the optimal alpha value for the lasso regression. 

In [93]:
lasso_cv.fit(X, Y) ;
best_alpha = lasso_cv.alpha_ ;

print("Best alpha:", best_alpha)

Best alpha: 353.5123729006745


  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


runs the lasso regression on the training data with the best alpha and prints the coefficients.  But they are no where near what I was expecting.
I wanted sometihng like X1 = 5 , X2 = -8, X3 = 3 and the others 0 or close to zero (like the arbitrary model I created when making 'Y')

In [97]:

lasso_reg = skl.Lasso(alpha=best_alpha)
lasso_reg.fit(Xtrain, Ytrain)
lasso_reg.coef_
for i, coef in enumerate(lasso_reg.coef_):
    print(f"X{i+1}: {coef}")
    


X1: 0.0
X2: -0.0
X3: 0.0
X4: -0.0
X5: 0.0
X6: -0.0
X7: 0.0
X8: -0.0
X9: 0.029428304050691186
X10: -0.010709702708107676
