In [1]:
import numpy as np
import pandas as pd

## Data preprocessing


In [2]:
df = pd.read_csv('Boston.csv')

In [3]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
df.isna().sum()

crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
black      0
lstat      0
medv       0
dtype: int64

In [5]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [6]:
# normal the data
from sklearn.preprocessing import MinMaxScaler

# initize a scaler
scaler_X = MinMaxScaler()

# fit and transform
X_scaled = scaler_X.fit_transform(X)


In [7]:
# split the dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=0)

## Gradient

In [58]:
class WEN():
    '''
    Implement weighted elastic net
    '''
    
    def __init__(self, l1= 0.5, l2= 0.5, weight =  None,
                 step_size = 1e-3, max_iter = 50, 
                 tol= 1e-4, random_state = 0):
        '''
        Args:
            l1 (float): A float between 0 and 1 for lasso regularization
            l2 (float): A float between 0 and 1 for riage regularization
            weight (array): A 2D weight with the suitable dimensions. Default is identify matrix
            step_size (float): A float that determines the step size
            max_iter (int): The maximun number of iterations
            tol (float): The tolerlance for the solution
            random_state (int): The random state
            
        Returns: None
        '''
        self._weight = weight
        self._max_iter = max_iter
        self._step_size = step_size
        self._tol = tol
        self._random_state = random_state
        self._coeff = None
        self._num_obersvations = None
        self._num_features = None
        self._max_peak = None
        self._min_peak = None
        self._l1 = l1
        self._l2 = l2
        self._gradient = None
        self._cost = None
        self._is_increased = False
        
    def _get_num_observations(self, X):
        '''
        Args:
            X (array): A matrix that contains data
        
        Returns: None
        '''
        self._num_obersvations = (np.array(X)).shape[0]
        
    def _get_num_features(self, X):
        '''
        Args:
            X (array): A matrix that contains data
            
        Returns: None
        '''
        self._num_features = (np.array(X)).shape[1]
    
    def _get_peaks(self, X):
        '''
        Args:
            X (array): A matrix that contains data
            
        Returns: None
        '''
        self._max_peak = np.max(np.array(X), axis = 0)
        self._min_peak = np.min(np.array(X), axis = 0)
        
    def _get_initial_coeff(self, X):
        '''
        Args:
            X (array): A matrix that contians data
            
        Reutrns: None
        '''
        np.random_state = self._random_state
        
        if not self._num_features:
            self._get_num_features(X)
        if (not self._min_peak) or (not self._max_peak):
            self._get_peaks(X)
        ranges = (self._max_peak - self._min_peak).reshape(self._num_features,1)
        self._coeff = ranges*np.random.random(size = (self._num_features, 1)) \
            + self._min_peak.reshape(self._num_features, 1)
    
    def _get_initial_weight(self, X):
        '''
        Args:
            X (array): A matrix that contains data
        
        Returns: None 
        '''
        if not self._num_obersvations:
            self._get_num_observations(X)
        if not self._weight:
            self._weight = np.identity(self._num_obersvations)
        
    def _should_stop(self, old_coeff, iters):
        '''
        Args:
            old_coeff (array): The old coefficients 
            iters: The number of iterations
        
        Returns:
            boolean: True for meeting stop conditions. Otherwise returns False
        '''
        if iters > self._max_iter:
            return True
        else:
            if old_coeff is None:
                return False
            else:
                if self._is_increased:
                    return False
                else:
                    return (np.abs(old_coeff - self._coeff).sum() < self._tol)
            
    def _gradient_abs_function(self):
        '''
        Args: None
            
        Returns: 
            array: a array with the same size of self._coeff
        '''
        sub_derivative = (self._coeff != 0)*np.sign(self._coeff) # when component != 0
        sub_derivative += (self._coeff == 0)*(2*np.random.random() - 1) # when component == 0
        return sub_derivative       
        
    def _calculate_gradient(self, X, y):
        '''
        Args:
            X (array): A matrix that contians data
            y (array): A array that contains labels
            
        Returns: 
            array: a array with the same size of self._coeff
        '''
        
        y = np.array(y).reshape(self._num_obersvations, 1)
        X = np.array(X)
        sub_derivative = self._gradient_abs_function()
        I = np.identity(self._num_features)
        
        g1 = 2*(np.transpose(X)@self._weight@X + self._l2*I)@self._coeff/(1 + self._l2)
        g2 = -2*np.transpose(X)@self._weight@y
        g3 = self._l1*sub_derivative
    
        self._gradient = g1 + g2 + g3

    def _calculate_cost(self, X, y):
        '''
        Args:
            X (array): A matrix that contians data
            y (array): A array that contains labels
            
        Returns: 
            float : a float represents the loss
        '''
        
        y = np.array(y).reshape(self._num_obersvations, 1)
        X = np.array(X)
        I = np.identity(self._num_features)
        
        lf1 = np.transpose(self._coeff)@(np.transpose(X)@self._weight@X + self._l2*I)/(1+ self._l2)@self._coeff
        lf2 = -2*np.transpose(y)@self._weight@X@self._coeff 
        lf3 = self._l1*np.linalg.norm(self._coeff, ord=1)
        
        self._cost = np.asscalar(lf1 + lf2)+ lf3
    
    def _assign_coeff(self, coeff):
        self._coeff = coeff
        
    def _assign_cost(self, cost):
        self._cost = cost 
    
    def _assign_gradient(self, gradient):
        self._gradient = gradient
        
    def _calculate_coeff(self, X, y):
        '''
        Args:
            X
            y
        
        '''
        self._coeff = self._coeff - self._step_size*self._gradient
        
        
    def fit(self, X, y):
        '''
        Args:
            X (array): array that contains data
            y (array): array that contains labels
    
        '''
        iters = 0
        old_coeff = None
        old_cost = None
        
        self._get_initial_coeff(X)
        self._get_initial_weight(X)
                
        while not self._should_stop(old_coeff, iters):
            iters += 1
            old_coeff = self._coeff
            old_cost = self._cost
            
            self._calculate_gradient(X, y)
            self._calculate_cost(X, y)
            self._calculate_coeff(X, y)
            #print(self._coeff)
            if old_cost:
                if self._cost >= old_cost:
                    self._is_increased = True
                    
                    self._assign_cost(old_cost)
                    self._assign_coeff(old_coeff \
                                       + 0.01*np.linalg.norm(old_cost)*np.random.random(size = self._coeff.shape))
                else:
                    self._is_increased = False
                #print(self._coeff)
            #print(iters, self._cost)
        

In [59]:
np.max(np.array(X_train), axis = 0).shape

(13,)

In [68]:
net1 = WEN(max_iter=5000, step_size=1.25e-3)

In [69]:
net1.fit(X_train, y_train)

1 -29206.7777427
2 -173379.750203
3 -230231.24457
4 -254226.557179
5 -265389.877091
6 -271237.61125
7 -274695.099748
8 -276963.980068
9 -278577.878501
10 -279798.323941
11 -280765.856407
12 -281562.562317
13 -282239.412707
14 -282829.391645
15 -283354.4441
16 -283829.468051
17 -284264.751474
18 -284667.477072
19 -285042.300591
20 -285393.599646
21 -285724.202686
22 -286036.285254
23 -286331.568565
24 -286611.453395
25 -286877.110693
26 -287129.543205
27 -287369.62764
28 -287598.143761
29 -287815.794673
30 -288023.221153
31 -288221.011966
32 -288409.711416
33 -288589.825038
34 -288761.823967
35 -288926.148421
36 -289083.158368
37 -289233.172957
38 -289376.683403
39 -289514.030144
40 -289645.533148
41 -289771.493361
42 -289892.194009
43 -290007.901772
44 -290118.867874
45 -290225.329069
46 -290327.50857
47 -290425.616898
48 -290519.852676
49 -290610.403368
50 -290697.445971
51 -290781.147653
52 -290861.666357
53 -290939.151359
54 -291013.743799
55 -291085.577162
56 -291154.777744
57 -291

645 -293584.797837
646 -293584.824995
647 -293584.85194
648 -293584.878674
649 -293584.905199
650 -293584.931515
651 -293584.957626
652 -293584.983532
653 -293585.009236
654 -293585.034738
655 -293585.060041
656 -293585.085147
657 -293585.110056
658 -293585.134771
659 -293585.159293
660 -293585.183623
661 -293585.207763
662 -293585.231716
663 -293585.255481
664 -293585.279062
665 -293585.302458
666 -293585.325673
667 -293585.348707
668 -293585.371561
669 -293585.394238
670 -293585.416738
671 -293585.439064
672 -293585.461215
673 -293585.483195
674 -293585.505004
675 -293585.526644
676 -293585.548116
677 -293585.569421
678 -293585.590561
679 -293585.611537
680 -293585.632351
681 -293585.653003
682 -293585.673495
683 -293585.693829
684 -293585.714005
685 -293585.734025
686 -293585.753891
687 -293585.773602
688 -293585.793161
689 -293585.81257
690 -293585.831828
691 -293585.850937
692 -293585.869899
693 -293585.888715
694 -293585.907385
695 -293585.925911
696 -293585.944295
697 -293585.96

1326 -293588.351705
1327 -293588.351881
1328 -293588.352056
1329 -293588.35223
1330 -293588.352402
1331 -293588.352574
1332 -293588.352744
1333 -293588.352912
1334 -293588.35308
1335 -293588.353246
1336 -293588.353412
1337 -293588.353576
1338 -293588.353738
1339 -293588.3539
1340 -293588.354061
1341 -293588.35422
1342 -293588.354378
1343 -293588.354535
1344 -293588.354691
1345 -293588.354846
1346 -293588.355
1347 -293588.355152
1348 -293588.355304
1349 -293588.355454
1350 -293588.355603
1351 -293588.355752
1352 -293588.355899
1353 -293588.356045
1354 -293588.35619
1355 -293588.356334
1356 -293588.356477
1357 -293588.356619
1358 -293588.35676
1359 -293588.3569
1360 -293588.357039
1361 -293588.357177
1362 -293588.357314
1363 -293588.35745
1364 -293588.357585
1365 -293588.357719
1366 -293588.357852
1367 -293588.357984
1368 -293588.358115
1369 -293588.358246
1370 -293588.358375
1371 -293588.358503
1372 -293588.358631
1373 -293588.358757
1374 -293588.358883
1375 -293588.359008
1376 -293588.