In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

In [2]:
from sklearn.datasets import fetch_california_housing

In [3]:
data = fetch_california_housing()

In [4]:
data

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [5]:
X,y = data.data,data.target

In [6]:
X.shape

(20640, 8)

In [7]:
y.shape

(20640,)

Lasso loss = min ||y-Wx||^2 + A/2 |w|

co ordinate descent, W = 1/L2norm *S(z,lembda)
S(z,lembda) = sign(z)*max(|z|-lembda,0)
z = Xj(Y - Y_pred_without_jth_feature)

In [8]:
def gradeint(x,y,w):
    return ((x.T@x)@w-(x.T@y))

In [9]:
def soft_threshold(x,z,lembda):
    if z>0:
        return max(z-lembda,0)/(np.linalg.norm(x,ord=2))**2
    else:
        return -max(np.abs(z)-lembda,0)/ (np.linalg.norm(x,ord=2))**2

In [10]:
def weight(x,y):
    c = np.dot(x.T,x)
    inv = np.linalg.pinv(c)
    comp = np.dot(x.T,y)
    w = np.dot(inv,comp)
    return w

In [11]:
def prediction_f(X_train_new,Y_train,f):
    X_n = np.concatenate([X_train_new[:,:f],X_train_new[:,f+1:]],axis=1)
    
    c = np.dot(X_n.T,X_n)
    inv = np.linalg.pinv(c)
    comp = np.dot(X_n.T,Y_train)
    w = np.dot(inv,comp)
    pred = np.dot(X_n,w)
    return pred

In [19]:
def co_ordinate_descent(X_train_new,Y_train,k=100,lembda=1e3):
    W_lasso  = np.ones(X_train_new.shape[1],dtype='float')
    
    for i in range(k):
        
        W_lasso_n = W_lasso.copy()
        for f in range(X_train_new.shape[1]):
            
            predict_without_f = prediction_f(X_train_new,Y_train,f)
            
        
            z = np.dot(X_train_new[:,f].T,(Y_train -predict_without_f))
            
            threshold = soft_threshold(X_train_new[:,f],z,lembda)
           
            W_lasso_n[f] += threshold 
            
            
        
        if np.linalg.norm(W_lasso_n  - W_lasso) < -1e-2:
           
            print(i)
            return W_lasso_n 
        W_lasso = W_lasso_n.copy()
        
            
    return W_lasso
    
    
    

In [21]:
X_train.shape

(16000, 8)

In [22]:
Y_train.shape

(16000,)

In [23]:
X_test,Y_test = X[16000:,],y[16000:]

In [24]:
X_test.shape

(4640, 8)

In [72]:
W =co_ordinate_descent(X_train,Y_train,k=100,lembda=1e10)

In [73]:
W.shape

(8,)

In [74]:
def prediction(X,w):
    return X@w

In [75]:
y_preds = prediction(X_train,W)

In [76]:
from sklearn.metrics import mean_squared_error,r2_score

In [77]:
print(mean_squared_error(Y_train,y_preds)**0.5)

1826.5166657520447


In [78]:
print(r2_score(Y_train,y_preds))

-2603067.6734371693


In [34]:
X_train

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   9.3603    ,   51.        ,    6.76558603, ...,    2.71820449,
          37.75      , -122.47      ],
       [   6.1592    ,   46.        ,    6.8893617 , ...,    2.75106383,
          37.75      , -122.47      ],
       [   4.6071    ,   52.        ,    6.03018868, ...,    2.6       ,
          37.75      , -122.47      ]])

In [35]:
Y_train

array([4.526  , 3.585  , 3.521  , ..., 5.00001, 3.817  , 3.374  ])

In [43]:
y_preds

array([ 297.53869206, 2355.2612594 ,  483.03124181, ..., 1076.1116217 ,
       1271.23707234,  670.59276038])

In [79]:
X_std = X_train.std(axis=0)
fixed_std = np.where(X_std==0,1,X_std)
X_scaled = X_train - X_train.mean(axis=0)/fixed_std

In [80]:
Y_std = Y_train.std(axis=0)
fixed_stdy = np.where(Y_std==0,1,Y_std)
Y_scaled = Y_train-Y_train.mean()/fixed_stdy

In [82]:
W2 =co_ordinate_descent(X_scaled,Y_scaled,k=100,lembda=1e4)

In [191]:
intercept = Y_train.mean() - X_train.mean(axis=0).T@W2

In [192]:
intercept

5780.839885385227

In [93]:
y_preds2 = X_train@W2 + intercept

In [94]:
print(mean_squared_error(Y_train,y_preds2)**0.5)

1168.099408109506


In [95]:
print(r2_score(Y_train,y_preds2))

-1064627.173946721


In [96]:
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)

In [97]:
X.shape

(442, 10)

In [98]:
X_train,Y_train = X[:370,],y[:370]

In [99]:
X_test,Y_test = X[370:,],y[370:]

In [100]:
X_train.shape

(370, 10)

In [101]:
X_std = X_train.std(axis=0)
fixed_std = np.where(X_std==0,1,X_std)
X_scaled = X_train - X_train.mean(axis=0)/fixed_std

In [102]:
Y_std = Y_train.std(axis=0)
fixed_stdy = np.where(Y_std==0,1,Y_std)
Y_scaled = Y_train-Y_train.mean()/fixed_stdy

In [199]:
W2 =co_ordinate_descent(X_scaled,Y_scaled,k=10000,lembda=1e3)

In [200]:
intercept = Y_train.mean() - X_train.mean(axis=0).T@W2

In [201]:
intercept

153.14273066819771

In [202]:
y_preds2 = X_train@W2 + intercept

In [196]:
y_preds3 = X_train@W3

In [203]:
print(mean_squared_error(Y_train,y_preds2)**0.5)

77.02311646971725


In [204]:
print(r2_score(Y_train,y_preds2))

0.003254336031872307


In [144]:
from sklearn.linear_model import Lasso

In [187]:
lasso = Lasso(alpha=0.1, max_iter=10000)  # alpha = λ, regularization strength
lasso.fit(X_scaled, Y_scaled)

In [189]:
print("Intercept:", lasso.intercept_)
print("Coefficients:", lasso.coef_)
print("Training R^2:", lasso.score(X_scaled, Y_train))


Intercept: 158.8506943290304
Coefficients: [   0.         -174.24080201  519.63988321  255.70702875  -52.95665431
   -0.         -204.28142678    0.          454.27923877   93.35058091]
Training R^2: 0.4966631462542921
