In [14]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Regularization:
A group of techniques that provides additional information to our model to prevent it from overfitting and helps it to generalize well.
## Ridge Regularization (L2):
The L2 regularization keeps all the features in our model and ensures there coefficinet are evenly distributed. The L2 regularization is used where all the features are equally important and there is no need for feature selection. The alpha is used to control the strength of regularization its values ranges from 0 to inf.

In [15]:
class our_RidgeRegressor():
    def __init__(self,alpha= 0.1):
        self.alpha = alpha
        self.intercept_ = None
        self.coef_ = None
    def fit(self, X_train, y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        betas = np.linalg.inv(np.dot(X_train.T,X_train)-self.alpha*np.identity(X_train.shape[1])).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
    def predict(self,X_test):
        pred = np.dot(X_test,self.coef_) + self.intercept_
        return pred

In [16]:
from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42) 

### Scikit-Learns Implementation

In [17]:
reg = Ridge(alpha=0.1)
reg.fit(X_train,y_train)
pred = reg.predict(X_test)
score = r2_score(y_test,pred)
print("r2 score= ",score)
print("coefficient=" ,reg.coef_)
print("Intercept=" ,reg.intercept_)

r2 score=  0.46085219464119254
coefficient= [  42.85566976 -205.49431899  505.08903304  317.0932049  -108.50026183
  -86.23673333 -190.36318008  151.70708637  392.28931896   79.9081772 ]
Intercept= 151.45857456679613


### Our Implementation

In [19]:
our_reg = our_RidgeRegressor()
our_reg.fit(X_train,y_train)
our_pred = our_reg.predict(X_test)
score = r2_score(y_test,our_pred)
print("r2 score= ",score)
print("coefficient=" ,our_reg.coef_)
print("Intercept=" ,our_reg.intercept_)

r2 score=  0.44104446740203906
coefficient= [  42.77436386 -282.9322093   629.32063045  371.30363733   -7.46605735
 -234.71864346 -298.84344422  110.01738796  437.98583952    9.48542789]
Intercept= 151.1586533987435
