In [1]:
from py_regression.custom import CustomRegressor
from py_regression.ridge_regression import RidgeRegression
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
import numpy as np
import pandas as pd
import os

In [2]:
# Generating data
n_samples = 50

X, y, coef = datasets.make_regression(n_samples=n_samples, n_features=2,
                                      n_informative=1, noise=1, coef=True, random_state=2020)

# Splitting dataset into train and test set 
X_train, X_test, Y_train, Y_test = train_test_split(X, y, 
                            test_size=0.2, random_state=123)

In [8]:
Y_train.mean()

0.8499747105516381

In [3]:
# Model training     `
model = RidgeRegression(l2_penality = 0.01) 
model.fit(X_train, Y_train)

# Prediction on test set 
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print('MSE:', mse)
print("Trained Coef:", model.W)  
Y_test, Y_pred

MSE: 120.41547249074092
Trained Coef: [4.15728916 0.69778998]


(array([ -4.51729397, -15.0777967 , -20.38069265,  13.01182383,
         -4.2350477 ,   7.4169711 ,  21.57988713,  15.83279275,
          4.7991907 ,  19.1217498 ]),
 array([-0.30479315, -3.52731077, -4.14718529,  3.35705103, -0.82156437,
         0.40466026,  5.26532884,  4.99500677,  2.1171584 ,  3.48159789]))

In [4]:
cr = CustomRegressor()
cr.fit(X_train, Y_train) 
Y_pred = cr.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print('Loss: logcosh , MSE:', mse, 'coef:', cr.W)      
Y_test, Y_pred

Loss: logcosh , MSE: 8.719136675263501 coef: [16.09341243  0.34675723]


(array([ -4.51729397, -15.0777967 , -20.38069265,  13.01182383,
         -4.2350477 ,   7.4169711 ,  21.57988713,  15.83279275,
          4.7991907 ,  19.1217498 ]),
 array([ -2.25365378, -12.97470176, -15.65948381,  10.3286596 ,
         -3.38457468,   4.54781352,  17.07425294,  14.10045139,
          3.90398208,  15.2363162 ]))

### The effect of the extreme values can be recuded by using logcosh

In [15]:
Y_train[0] = 20

In [16]:
# Model training
model = RidgeRegression() 
model.fit(X_train, Y_train) 

# Prediction on test set 
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print('MSE:', mse)
print("Trained Coef:", model.W)  
print("Original Coef:", coef)
Y_test, Y_pred

MSE: 143.8223687001078
Trained Coef: [ 3.80388182 -0.98474715]
Original Coef: [19.9916989  0.       ]


(array([ -4.51729397, -15.0777967 , -20.38069265,  13.01182383,
         -4.2350477 ,   7.4169711 ,  21.57988713,  15.83279275,
          4.7991907 ,  19.1217498 ]),
 array([-0.73365569, -2.4731708 , -3.23694894,  1.51854211, -0.60700127,
         2.71112963,  2.82226839,  1.24617838, -0.7361757 ,  4.68353053]))

In [17]:
cr = CustomRegressor()
cr.fit(X_train, Y_train) 
Y_pred = cr.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
print('Loss: logcosh , MSE:', mse, 'coef:', cr.W)      
Y_test, Y_pred

Loss: logcosh , MSE: 8.717530134744258 coef: [16.09386584  0.34665745]


(array([ -4.51729397, -15.0777967 , -20.38069265,  13.01182383,
         -4.2350477 ,   7.4169711 ,  21.57988713,  15.83279275,
          4.7991907 ,  19.1217498 ]),
 array([ -2.25374767, -12.9750161 , -15.65988706,  10.32884607,
         -3.38465998,   4.54809993,  17.0745996 ,  14.1006246 ,
          3.90391194,  15.23684686]))