In [1]:
import numpy as np
import _pickle as cPickle

In [67]:
def l0_check(a: np.array):
    b = np.array(a)
    for i in range(len(b)):
        if (b[i] != 0):
            b[i] = 1
    return b

In [74]:
FILE_FOR_MODEL = "05_class_Linear_regression_model.txt"
class LinearRegression:
    def __init__(self, alpha: float, l0: float, l1: float, l2: float, stop_iter: float, stop_delta: float, verbose: bool, model: object):
        '''
        Linerar Regression object constructor
        
        :param alpha: alpha param for gradient descent
        :param l0: L0 regularization coefficient
        :param l1: L1 regularization coefficient
        :param l2: L2 regularization coefficient
        :param stop_iter: maximum iterations of traing
        :param stop_delta: stop iteration delta
        :param verbose: show verbose information
        :param model: get params from other model
        '''
        if (model == None):
            self.alpha = alpha
            self.l0 = l0
            self.l1 = l1
            self.l2 = l2
            self.stop_iter = stop_iter
            self.stop_delta = stop_delta
            self.verbose = verbose
            self.model = model
        else:            
            self.alpha = model['alpha']
            self.l0 = model['l0']
            self.l1 = model['l1']
            self.l2 = model['l2']
            self.stop_iter = model['stop_iter']
            self.stop_delta = model['stop_delta']
            self.verbose = model['verbose']
            self.model = model['model']
    
    def train(self, X: np.array, y: np.array, warm_start: bool):
        '''
        Fit Linear Regression params
        
        :param X: training data
        :param y: training ansewers
        :param warm_start: must be set True to continue training, false to reset params
        '''
        
        if (warm_start == False):
            self.w = np.random.rand(len(X[0]))
        else:
            file = open(FILE_FOR_MODEL, 'rb')
            self.w = cPickle.load(file)
            file.close()
            
        indicator_for_cycle_exit_criterion = 1   
        
        for j in range(int(self.stop_iter)):
            
            grad = 2*X.T.dot(X.dot(self.w.T)-y)/len(y) + self.l0*l0_check(self.w) + self.l1*np.sign(self.w) + self.l2*2*self.w
            self.w = self.w - self.alpha*grad
            j += 1
            
            if (self.verbose == True):
                if (j % 10000 == 0):
                    print('iteration = ', j, 'vect =', self.w, 'grad =', grad)
            
            if(np.linalg.norm(grad) < self.stop_delta):
                indicator_for_cycle_exit_criterion = 0   
                print('|grad| < stop_delta')
                break
        
        if (indicator_for_cycle_exit_criterion):
            print(self.stop_iter, 'iterations done')
        
        file = open(FILE_FOR_MODEL, 'wb')
        cPickle.dump(self.w, file)
        file.close()
        print('train results saved in', FILE_FOR_MODEL)
        
            
    
    def get_params(self) -> dict:
        '''
        Return model params
        
        :return: dict of model params
        '''
        dict_params = {}
        dict_params['alpha'] = self.alpha
        dict_params['l0'] = self.l0
        dict_params['l1'] = self.l1
        dict_params['l2'] = self.l2
        dict_params['stop_iter'] = self.stop_iter
        dict_params['stop_delta'] = self.stop_delta
        dict_params['verbose'] = self.verbose
        dict_params['model'] = self.model
        return dict_params
    
    def predict(self, X: np.array) -> np.array:
        file = open(FILE_FOR_MODEL, 'rb')
        self.w = cPickle.load(file)
        file.close()
        '''
        Predit answers on given data
        
        :param X: data
        :return: predicted answers
        '''
        return X.dot(self.w.T)
    
    def test(self, X: np.array, y: np.array, metric=None) -> float:
        file = open(FILE_FOR_MODEL, 'rb')
        self.w = cPickle.load(file)
        file.close()
        '''
        Test the model
        
        :param X: test data
        :param y: test answers
        :param metric: must be a function of 2 numpy arrays. If None, MSE is used.
        :return: metric value
        '''
        return (X.dot(self.w.T) - y).dot((X.dot(self.w.T) - y).T)/len(y)

In [79]:
lr = LinearRegression(alpha=1e-4, l0=0., l1=0., l2=0., stop_iter=1e5, stop_delta=1e-16, verbose=False, model=None)

In [76]:
x_linear = np.array([[1,1],
                    [1,2],
                    [1,4],
                    [1,9],
                    [1,-10]])
y_linear = np.array([8,9,4, 1, 100])

In [26]:
lr.verbose = True
lr.train(X=x_linear, y=y_linear, warm_start=False)

iteration =  10000 vect = [ 26.72967878  -5.48876447] grad = [-8.51531806  0.25910966]
iteration =  20000 vect = [ 30.5047294   -5.60363418] grad = [-1.23950208  0.03771638]
iteration =  30000 vect = [ 31.05423125  -5.62035478] grad = [-0.18042372  0.00549005]
iteration =  40000 vect = [ 31.13421754  -5.62278865] grad = [-0.02626274  0.00079914]
iteration =  50000 vect = [ 31.14586046  -5.62314293] grad = [-0.00382284  0.00011632]
iteration =  60000 vect = [ 31.14755522  -5.6231945 ] grad = [ -5.56458521e-04   1.69322832e-05]
iteration =  70000 vect = [ 31.14780192  -5.62320201] grad = [ -8.09989112e-05   2.46468782e-06]
iteration =  80000 vect = [ 31.14783783  -5.6232031 ] grad = [ -1.17903192e-05   3.58763549e-07]
iteration =  90000 vect = [ 31.14784305  -5.62320326] grad = [ -1.71621601e-06   5.22222138e-08]
iteration =  100000 vect = [ 31.14784381  -5.62320328] grad = [ -2.49814831e-07   7.60157945e-09]
100000.0 iterations done


In [80]:
lr.verbose = True
lr.train(X=x_linear, y=y_linear, warm_start=False)

iteration =  10000 vect = [ 26.69537333  -5.4877206 ] grad = [-8.5814364   0.26112155]
iteration =  20000 vect = [ 30.49973585  -5.60348223] grad = [-1.24912636  0.03800923]
iteration =  30000 vect = [ 31.05350439  -5.62033266] grad = [-0.18182465  0.00553268]
iteration =  40000 vect = [ 31.13411174  -5.62278543] grad = [-0.02646666  0.00080534]
iteration =  50000 vect = [ 31.14584506  -5.62314246] grad = [-0.00385253  0.00011723]
iteration =  60000 vect = [ 31.14755298  -5.62319443] grad = [ -5.60779219e-04   1.70637560e-05]
iteration =  70000 vect = [ 31.14780159  -5.623202  ] grad = [ -8.16278381e-05   2.48382534e-06]
iteration =  80000 vect = [ 31.14783778  -5.6232031 ] grad = [ -1.18818670e-05   3.61549081e-07]
iteration =  90000 vect = [ 31.14784305  -5.62320326] grad = [ -1.72954170e-06   5.26277574e-08]
iteration =  100000 vect = [ 31.14784381  -5.62320328] grad = [ -2.51754525e-07   7.66050903e-09]
100000.0 iterations done
train results saved in 05_class_Linear_regression_mode

In [28]:
lr.get_params()

{'alpha': 0.0001,
 'l0': 0.0,
 'l1': 0.0,
 'l2': 0.0,
 'model': None,
 'stop_delta': 1e-16,
 'stop_iter': 100000.0,
 'verbose': True}

In [29]:
lr2=LinearRegression(**lr.get_params())

In [30]:
x2_linear = np.array([[1,5],
                    [1,2],
                    [1,5],
                    [1,7],
                    [1,-1]])
y2_linear = np.array([0,5,-4, 1, 10])

In [31]:
lr2.train(X=x_linear, y=y_linear, warm_start=False)

iteration =  10000 vect = [ 26.6071152   -5.48503503] grad = [-8.75154006  0.26629757]
iteration =  20000 vect = [ 30.48688887  -5.60309131] grad = [-1.2738869   0.03876266]
iteration =  30000 vect = [ 31.05163436  -5.62027576] grad = [-0.18542883  0.00564235]
iteration =  40000 vect = [ 31.13383954  -5.62277715] grad = [-0.02699129  0.00082131]
iteration =  50000 vect = [ 31.14580544  -5.62314126] grad = [-0.00392889  0.00011955]
iteration =  60000 vect = [ 31.14754722  -5.62319426] grad = [ -5.71895143e-04   1.74019986e-05]
iteration =  70000 vect = [ 31.14780075  -5.62320197] grad = [ -8.32458885e-05   2.53306010e-06]
iteration =  80000 vect = [ 31.14783766  -5.62320309] grad = [ -1.21173926e-05   3.68715803e-07]
iteration =  90000 vect = [ 31.14784303  -5.62320326] grad = [ -1.76382540e-06   5.36709024e-08]
iteration =  100000 vect = [ 31.14784381  -5.62320328] grad = [ -2.56745086e-07   7.81238327e-09]
100000.0 iterations done


In [32]:
lr2.test(X=x2_linear, y=y2_linear)

216.45776749214892