In [1]:
import numpy as np
from numpy import ndarray
from typing import Dict,Tuple

In [2]:
def RSS(y_obs:ndarray,y_pred:ndarray):
    return np.sum(np.power(y_obs-y_pred,2))      # Residual sum of squares.

def mse(y_obs:ndarray,y_pred:ndarray):
    return np.mean(np.power(y_obs-y_pred,2))    # Mean Squared Error.


def rmse(y_obs:ndarray,y_pred:ndarray):
    return np.sqrt(mse(y_obs,y_pred))        # Root Mean Squared Error.

def mae(y_obs:ndarray,y_pred:ndarray):
    return np.mean(np.abs(y_obs-y_pred))    # Mean Absolute Error.

In [3]:
def Elastic_Net_Sandip(S:ndarray,y:ndarray,weights:Dict[str,ndarray],alpha:float)->Tuple[Dict[str,ndarray],float]:
    assert S.shape[0] == y.shape[0]
    assert S.shape[1] == weights['W'].shape[0]
    
    f1 = np.dot(S,weights['W'])
    f2 = f1 + weights['K']
    f3 = mse(y,f2) + alpha*np.sum(np.power(weights['W'],2)) + alpha*np.sum(np.abs(weights['W']))
    
    forward_info:Dict[str,ndarray] = {}
    
    forward_info['S'] = S
    forward_info['y'] = y
    forward_info['alpha'] = alpha
    forward_info['f1'] = f1
    forward_info['f2'] = f2
    
    return forward_info, f3

In [4]:
def permute_data(S:ndarray,K:ndarray):
    perm = np.random.permutation(S.shape[0])
    
    return S[perm], K[perm]

In [5]:
def S_grad(forward_info:Dict[str,ndarray],weights:Dict[str,ndarray])->Dict[str,ndarray]:
    S = forward_info['S']
    y = forward_info['y']
    f2 = forward_info['f2']
    alpha = forward_info['alpha']
    W = weights['W']
    
    grad_W = -2*np.dot(np.transpose(S),(y-f2))/S.shape[0] + 2*alpha*W + alpha*W/np.abs(W)
    partial_der_K = -2*np.sum(y-f2)/S.shape[0]
    
    gradients:Dict[str,ndarray] = {'W':grad_W,'K':partial_der_K}
        
    return gradients
    

In [6]:
def generate_batch(S:ndarray,y:ndarray,start:int = 0,batch_size:int = 100)->Tuple[ndarray,ndarray]:
    assert S.ndim == y.ndim == 2
    
    if start + batch_size > S.shape[0]:
        batch_size = S.shape[0] - start
        
    S_batch,y_batch = S[start:start+batch_size],y[start:start+batch_size]
    
    return S_batch, y_batch

In [7]:
def initializing_weights(dim_in:int)->Dict[str,ndarray]:
    
    W = np.random.randn(dim_in,1)
    K = np.random.randn(1,1)
    
    weights:Dict[str,ndarray] = {'W':W,'K':K}
    return weights

In [8]:
def train(S:ndarray,y:ndarray,alpha:float = 1., n_iter:int = 1000,lr_init:float = .01,batch_size:int = 100,
         return_losses:bool = False,return_weights:bool = True, seed:int =11)->None:
    if seed:
        np.random.seed(seed)
    start = 0
    
    weights = initializing_weights(S.shape[1])
    
    S,y = permute_data(S,y)
    
    if return_losses:
        losses = []
        
    for i in range(n_iter):
        if start >= S.shape[0]:
            S,y = permute_data(S,y)
            start = 0
        
        S_batch, y_batch = generate_batch(S,y,start,batch_size)
        start += batch_size
        
        forward_info,loss = Elastic_Net_Sandip(S_batch,y_batch,weights,alpha)
        
        if return_losses:
            losses.append(loss)
        
        loss_grads = S_grad(forward_info,weights)
        
        lr_k = learning_rate(lr_init,n_iter)
        for key in weights.keys():
            weights[key] -= lr_k*loss_grads[key]
            
    if return_weights:
        return losses, weights
    
    return None
        

In [9]:
def learning_rate(lr_init:float,k_itr:int):
    lr_T = lr_init/100                                    # A linear function to update the learning rate.
    if k_itr < 500:
        lr_k = (1-k_itr/500)*lr_init + k_itr/500*lr_T
        return lr_k
    else:
        return lr_T

    
        

In [10]:
def predict(S:ndarray,weights:Dict[str,ndarray]):
    return np.dot(S,weights['W']) + weights['K']

In [11]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [12]:
boston = load_boston()

In [13]:
data = boston.data
target = boston.target

In [14]:
data[:3]

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, 0.0000e+00, 5.3800e-01,
        6.5750e+00, 6.5200e+01, 4.0900e+00, 1.0000e+00, 2.9600e+02,
        1.5300e+01, 3.9690e+02, 4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01,
        6.4210e+00, 7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02,
        1.7800e+01, 3.9690e+02, 9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01,
        7.1850e+00, 6.1100e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02,
        1.7800e+01, 3.9283e+02, 4.0300e+00]])

In [15]:
S = StandardScaler()

In [16]:
data = S.fit_transform(data)

In [17]:
data[:3]

array([[-0.41978194,  0.28482986, -1.2879095 , -0.27259857, -0.14421743,
         0.41367189, -0.12001342,  0.1402136 , -0.98284286, -0.66660821,
        -1.45900038,  0.44105193, -1.0755623 ],
       [-0.41733926, -0.48772236, -0.59338101, -0.27259857, -0.74026221,
         0.19427445,  0.36716642,  0.55715988, -0.8678825 , -0.98732948,
        -0.30309415,  0.44105193, -0.49243937],
       [-0.41734159, -0.48772236, -0.59338101, -0.27259857, -0.74026221,
         1.28271368, -0.26581176,  0.55715988, -0.8678825 , -0.98732948,
        -0.30309415,  0.39642699, -1.2087274 ]])

In [18]:
target.ndim

1

In [19]:
target = target.reshape(-1,1)

In [20]:
target.ndim

2

In [21]:
train_data,test_data,train_target,test_target = train_test_split(data,target,test_size = .2,random_state = 12)

In [22]:
train_data.shape,train_target.shape

((404, 13), (404, 1))

In [34]:
train_info = train(train_data,train_target,batch_size=200,n_iter=5000,return_losses = True,seed=1)

In [35]:
train_info

([783.7693410199117,
  752.4093088111599,
  1112.0717463727133,
  788.5889730167988,
  735.6074526562352,
  1451.6430009578382,
  709.7305356816128,
  799.8573346958108,
  1889.9206181571083,
  777.0848439179151,
  744.5608549783633,
  973.4184841844809,
  803.3731452741538,
  717.7969533473099,
  736.8333574381053,
  793.722913449387,
  726.9973329918383,
  575.7871698188434,
  819.7109192363619,
  700.1361539131667,
  412.9226410594683,
  707.9718844674447,
  806.2281627643205,
  532.0670864540948,
  761.7328390613258,
  741.8814217069026,
  868.3943022618553,
  774.5044650974708,
  737.351458495224,
  260.07004425899873,
  739.660487437311,
  753.0915182888579,
  1079.7377739369383,
  689.5447810592195,
  788.925047318018,
  1598.6587977923066,
  699.8620571924034,
  792.3248446067988,
  704.0683176336272,
  743.6261506587438,
  748.6431802862368,
  492.33280100321883,
  759.1974680606501,
  727.9100208788979,
  565.7209580268799,
  850.3417810155937,
  635.802135384898,
  451.46423

In [36]:
train_info[0][4999]

113.99002571068169

In [37]:
weights = train_info[1]

In [38]:
train_pred_target = predict(train_data,weights)

In [39]:
mse(train_target,train_pred_target)

108.0904851068238

In [40]:
test_pred_target = predict(test_data,weights)

In [30]:
mse(test_target,test_pred_target)

112.09216624659832

In [41]:
rmse(train_target,train_pred_target)

10.396657400666033

In [43]:
rmse(test_target,test_pred_target)

10.587358794647432