! pip install numpy
! pip install pandas
! pip install matplotlib
! pip install seaborn

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from models import *
from regularizations import *

In [3]:
df = pd.read_csv('dane.data', sep='\t', names=[f'x{i}' for i in range(1, 7)]+['y'])
def partition(train, valid):
    assert train + valid < 1
    
    df_train = df.sample(frac=train)
    df_valid = df.drop(df_train.index).sample(frac=valid/(1-train))
    df_test = df.drop(df_train.index).drop(df_valid.index)
    
    return df_train, df_valid, df_test

df_train, df_valid, df_test = partition(3/4, 3/16)
print("train: ", df_train.shape)
print("validate: ", df_valid.shape)
print("test: ", df_test.shape)

train:  (1499, 7)
validate:  (375, 7)
test:  (125, 7)


In [4]:
class MinMaxScaler:
    def __init__(self, A):
        self.shift = A.min()
        self.factor = A.max()-A.min()
    
    def scale(self, A):
        return (A-self.shift)/self.factor
    
def get_data(df):
    y = df['y'].to_numpy()
    y = y.reshape([y.shape[0], 1])
    

    X = df.drop('y', axis=1)
    X.insert(0, 'x0', [1] * df.shape[0])
    X = X.to_numpy()
    
    return X, y

train_X, train_y = get_data(df_train)
valid_X, valid_y = get_data(df_valid)
test_X, test_y = get_data(df_test)
train_X.shape, train_y.shape

((1499, 7), (1499, 1))

In [5]:
scale_X = MinMaxScaler(train_X)
train_X = scale_X.scale(train_X)
valid_X = scale_X.scale(valid_X)
test_X = scale_X.scale(test_X)

scale_y = MinMaxScaler(train_y)
train_y = scale_y.scale(train_y)
valid_y = scale_y.scale(valid_y)
test_y = scale_y.scale(test_y)

In [6]:
# class BaseModel:
#     def __init__(self, X, y):
#         self.X = X
#         self.y = y
# 
#     def pred(self, X, theta):        
#         return np.dot(X, theta)
# 
#     def train(self, num_epochs, step_size=0.1):
#         N, D = self.X.shape
#         theta = np.zeros([D, 1])
#         loss=0
#         for epoch in range(num_epochs):
#             ypred = self.pred(self.X, theta)
#             loss = self._mse(self.y, ypred, theta)
#             gradient = self._grad(self.X, self.y, theta)
#             theta = self._step(theta, gradient, step_size)
#         return theta, loss
#     
#     def _mse(self, y, ypred, theta):
#        raise NotImplementedError
#     
#     def _grad(self, X, y, theta):
#         raise NotImplementedError
#     
#     def _step(self, theta, grad, step_size):
#         raise NotImplementedError
#     
#     def analytical(self):
#         raise NotImplementedError
#     

In [7]:
# class BaseRegularization:
#     def cost(self, theta):
#         raise NotImplementedError
#     
#     def grad(self, theta):
#         raise NotImplementedError

In [8]:
# class RidgeRegularization(BaseRegularization):
#     def __init__(self, l):
#         self.l = l
#         
#     def cost(self, theta):
#         return self.l * np.sum(theta**2)
#     
#     def grad(self, theta):
#         return 2*theta
        

In [9]:
# class LassoRegularization(BaseRegularization):
#     def __init__(self, l):
#         self.l = l
# 
#     def cost(self, theta):
#         return self.l * np.sum(np.abs(theta))
# 
#     def grad(self, theta):
#         return np.sign(theta)


In [10]:
# class LeastSquaresModel(BaseModel, BaseRegularization):
#     def _mse(self, y, ypred, _theta=None):
#         return np.mean((y - ypred) ** 2)
# 
#     def _grad(self, X, y, theta):
#         return np.dot(X.T, self.pred(X, theta)-y)/y.shape[0]
# 
#     def _step(self, theta, grad, step_size):
#         return theta - step_size * grad
#     
#     def analytical(self):
#         A = np.dot(self.X.T, self.X)
#         A = np.linalg.inv(A)
#         A = np.dot(A, self.X.T)
#         A = np.dot(A, self.y)
#         A = A.reshape([A.shape[0],1])
#         return A, self._mse(self.y, self.pred(self.X, A), A)

In [11]:
# class RidgeLSModel(BaseModel, RidgeRegularization):
#     def __init__(self, X, y, l):
#         super().__init__(X, y)
#         super(BaseModel, self).__init__(l)
#         self.reg = super(BaseModel, self)
#     
#     def _mse(self, y, ypred, theta):
#         return np.mean((y - ypred) ** 2) + self.reg.cost(theta)
# 
#     def _grad(self, X, y, theta):
#         return np.dot(X.T, self.pred(X, theta)-y)/y.shape[0] + self.reg.grad(theta)
# 
#     def _step(self, theta, grad, step_size):
#         return theta - step_size * grad
# 
#     def analytical(self):
#         A = np.dot(self.X.T, self.X)
#         A += self.l*np.identity(A.shape[0], np.int64)
#         A = np.linalg.inv(A)
#         A = np.dot(A, self.X.T)
#         A = np.dot(A, self.y)
#         A = A.reshape([A.shape[0],1])
#         return A, self._mse(self.y, self.pred(self.X, A), A)
#     

In [12]:
# class LassoLSModel(BaseModel, LassoRegularization):
#     def __init__(self, X, y, l):
#         super().__init__(X, y)
#         super(BaseModel, self).__init__(l)
#         self.reg = super(BaseModel, self)
# 
#     def _mse(self, y, ypred, theta):
#         return np.mean((y - ypred) ** 2) + self.reg.cost(theta)
# 
#     def _grad(self, X, y, theta):
#         return np.dot(X.T, self.pred(X, theta)-y)/y.shape[0] + self.reg.grad(theta)
# 
#     def _step(self, theta, grad, step_size):
#         return theta - step_size * grad
# 
#     def analytical(self):
#         ls_model = LeastSquaresModel(self.X, self.y)
#         ls_theta, _ = ls_model.analytical()
#         theta =  np.sign(ls_theta)*np.maximum(np.abs(ls_theta)-self.l/2, 0)
#         y_pred = self.pred(self.X, theta)
#         cost = self._mse(self.y, y_pred, theta)
#         return theta, cost

In [13]:
# class CustomModel(BaseModel):
#     def __init__(self, X, y, mse, grad, step):
#         super().__init__(X, y)
#         self._mse = mse
#         self._grad = grad
#         self._step = step


In [14]:
ls_model = LeastSquaresModel(train_X, train_y)
ls_model.train(1000, step_size=0.0001)

(array([[0.00736792],
        [0.0292366 ],
        [0.03669123],
        [0.00974029],
        [0.00616608],
        [0.00633138],
        [0.00605438]]),
 0.24730439527745235)

In [15]:
ls_model.analytical()

(array([[ 5.06630971],
        [-0.35137794],
        [ 0.07492408],
        [-0.30186809],
        [-0.05181084],
        [-0.23423101],
        [-0.30686608]]),
 0.05124387683337048)

In [16]:
rls_model = RidgeLSModel(train_X, train_y, 0.1)
rls_model.train(1000, step_size=0.0001)

(array([[0.00669146],
        [0.02655645],
        [0.03332332],
        [0.00884606],
        [0.00560003],
        [0.00575016],
        [0.00549895]]),
 0.2517791134016754)

In [17]:
rls_model.analytical()

(array([[ 3.72061152],
        [-0.32325427],
        [ 0.15217269],
        [ 0.19847099],
        [-0.01644203],
        [-0.10632055],
        [-0.26884405]]),
 1.4609569299012064)

In [18]:
lls_model = LassoLSModel(train_X, train_y, 0.1)
lls_model.train(1000, step_size=0.0001)

(array([[ 1.07266196e-04],
        [ 1.89239319e-05],
        [ 4.62722808e-06],
        [ 2.24099506e-05],
        [ 3.61089249e-05],
        [ 1.12863770e-05],
        [-6.98146507e-05]]),
 0.29656591076953714)

In [19]:
lls_model.analytical()

(array([[ 5.01630971e+00],
        [-3.01377944e-01],
        [ 2.49240759e-02],
        [-2.51868088e-01],
        [-1.81084428e-03],
        [-1.84231007e-01],
        [-2.56866083e-01]]),
 0.6554196582186247)