In [205]:
import collections
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
%matplotlib inline

In [None]:
# Linear Regression Class
class Linear_Regression():
    def __init__(self, data):
        self.data = data # Pandas data frame
        self.n = self.data.shape[0]
        self.p = self.data.shape[1]-1
        self.X = self.data.iloc[:, :self.p].to_numpy()
        self.X = np.insert(self.X, 0, np.ones(self.n), axis=1)
        self.Y = self.data.iloc[:, -1].to_numpy().reshape(-1, 1)
        self.param = np.random.uniform(low=-1, high=1, size=[self.p+1, 1])
        print("Params Dim: ", self.p)
        print("Data Size: ", self.n)
        
        # Pre-Computed results
        self.XTX = np.matmul(np.transpose(self.X), self.X)
        self.XTY = np.matmul(np.transpose(self.X), self.Y)
        
    
    def h_func(self):
        return np.matmul(self.X, self.param)
    
    def MSE(self):
        return ((self.h_func() - self.Y) ** 2).sum() / self.n
    
    def loss(self):
        return self.MSE() / 2
    
    def grad(self):
        return (np.matmul(self.XTX, self.param) - self.XTY) / self.n
    
    def evaluate(self, newdata):
        # newdata requires to be numpy format in size of m-by-11
        new_X = newdata.reshape(-1, self.p).copy()
        new_X  = np.insert(new_X , 0, np.ones(new_X .shape[0]), axis=1)
        return np.matmul(new_X, self.param)
    def cheat(self):
        opt_param = np.matmul(np.linalg.inv(self.XTX), self.XTY)
        opt_MSE = ((np.matmul(self.X, opt_param) - self.Y) ** 2).sum() / self.n
        print("Theoretical Optimized MSE: ", opt_MSE)
        print("Theoretical Optimized Params: ", '\n',opt_param)

        
    
class GD():
    def __init__(self, alpha, model):
        self.model = model # A model class, require model has attributes named "loss" and "grad"
        self.alpha = alpha # Learning Rate
        
    def step(self):
        grad = self.model.grad()
        self.model.param -= grad * self.alpha
    
    def model_reset(self):
        self.model.param = np.random.uniform(low=-1, high=1, size=[self.model.p+1, 1])
    
    def train(self, max_iter, log_interval = 1):
        for i in range(max_iter):
            self.step()
            if i % log_interval == 0:
                print("Iteration: ", i, "Loss: ", self.model.loss())
        
    

In [208]:
dat = pd.read_csv("winequality-red.csv")
dat

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [209]:
# Initialization
dat = pd.read_csv("winequality-red.csv")
lm = Linear_Regression(dat)
optimizer = SGD(1e-5, lm)
# Train
optimizer.model_reset()
optimizer.train(10000000, log_interval = 1000000)
print("Parameters: ",'\n', optimizer.model.param)
print("MSE: ", optimizer.model.MSE())
optimizer.model.cheat()

Params Dim:  11
Data Size:  1599
Iteration:  0 Loss:  569.632708345379
Iteration:  1000000 Loss:  0.23797679757334844
Iteration:  2000000 Loss:  0.23075987671370193
Iteration:  3000000 Loss:  0.22630731875015456
Iteration:  4000000 Loss:  0.22338985794194444
Iteration:  5000000 Loss:  0.22136789889602762
Iteration:  6000000 Loss:  0.2198953130833707
Iteration:  7000000 Loss:  0.21877775979612774
Iteration:  8000000 Loss:  0.21790139863228047
Iteration:  9000000 Loss:  0.21719630782179883
Parameters:  
 [[-9.32631714e-02]
 [ 4.63891272e-02]
 [-9.21517069e-01]
 [ 3.42736529e-02]
 [-4.29593279e-04]
 [ 5.19908644e-01]
 [ 4.67079740e-03]
 [-2.76123961e-03]
 [ 1.21035357e+00]
 [ 2.41734938e-01]
 [ 6.77708553e-01]
 [ 3.23866568e-01]]
MSE:  0.4332347448601869
Theoretical Optimized MSE:  0.41676716722140816
Theoretical Optimized Params:  
 [[ 2.19652085e+01]
 [ 2.49905527e-02]
 [-1.08359026e+00]
 [-1.82563948e-01]
 [ 1.63312698e-02]
 [-1.87422516e+00]
 [ 4.36133331e-03]
 [-3.26457970e-03]
 [-1.

In [179]:
# Validation
pred = optimizer.model.evaluate(dat.iloc[:, :-1].to_numpy()).reshape(-1)
label = dat.iloc[:, -1].to_numpy()
print(pred - label)

[ 0.13764471  0.23640751  0.2775234  ... -0.11981331  0.39685045
 -0.39787425]


In [None]:
-------------------------Bouns---------------------------------------------

In [200]:
# Linear Regression Class
class Ridge_Regression():
    def __init__(self, data,lamda=0.05):
        self.data = data # Pandas data frame
        self.n = self.data.shape[0]
        self.lamda =lamda
        self.p = self.data.shape[1]-1
        self.X = self.data.iloc[:, :self.p].to_numpy()
        self.X = np.insert(self.X, 0, np.ones(self.n), axis=1)
        self.Y = self.data.iloc[:, -1].to_numpy().reshape(-1, 1)
        self.param = np.random.uniform(low=-1, high=1, size=[self.p+1, 1])
        print("Params Dim: ", self.p)
        print("Data Size: ", self.n)
        
        # Pre-Computed results
        self.XTX = np.matmul(np.transpose(self.X), self.X)
        self.XTY = np.matmul(np.transpose(self.X), self.Y)
        
    
    def h_func(self):
        return np.matmul(self.X, self.param)
    
    def MSE(self):
        return ((self.h_func() - self.Y) ** 2).sum() / self.n
    
    def loss(self):
        return self.MSE() / 2 + self.lamda / 2 * np.linalg.norm(self.param, ord=2) ** 2
    
    def grad(self):
        return (np.matmul(self.XTX, self.param) - self.XTY) / self.n + self.lamda*self.param
    
    def evaluate(self, newdata):
        # newdata requires to be numpy format in size of m-by-11
        new_X = newdata.reshape(-1, self.p).copy()
        new_X  = np.insert(new_X , 0, np.ones(new_X .shape[0]), axis=1)
        return np.matmul(new_X, self.param)
    
    def cheat(self):
        opt_param = np.matmul(np.linalg.inv(self.XTX + self.n * self.lamda * np.identity(self.p+1)), self.XTY)
        opt_MSE = ((np.matmul(self.X, opt_param) - self.Y) ** 2).sum() / self.n
        print("Theoretical Optimized MSE: ", opt_MSE)
        print("Theoretical Optimized Params: ", '\n',opt_param)

        
        
    
class SGD():
    def __init__(self, alpha, model):
        self.model = model # A model class, require model has attributes named "loss" and "grad"
        self.alpha = alpha # Learning Rate
        
    def step(self):
        grad = self.model.grad()
        self.model.param -= grad * self.alpha
    
    def model_reset(self):
        self.model.param = np.random.uniform(low=-1, high=1, size=[self.model.p+1, 1])
    
    def train(self, max_iter, log_interval = 1):
        for i in range(max_iter):
            self.step()
            if i % log_interval == 0:
                print("Iteration: ", i, "Loss: ", self.model.loss())
        

In [201]:
# Initialization
dat = pd.read_csv("winequality-red.csv")
lm = Ridge_Regression(dat,lamda=0.05)
optimizer = SGD(1e-5, lm)
# Train
optimizer.model_reset()
optimizer.train(1000000, log_interval = 100000)
print("Parameters: ",'\n', optimizer.model.param)
print("MSE: ", optimizer.model.MSE())
optimizer.model.cheat()

Params Dim:  11
Data Size:  1599
Iteration:  0 Loss:  1898.9463224292754
Iteration:  100000 Loss:  0.3514074787756891
Iteration:  200000 Loss:  0.31066671155820125
Iteration:  300000 Loss:  0.3004928267405232
Iteration:  400000 Loss:  0.29248747892279864
Iteration:  500000 Loss:  0.28570659730451925
Iteration:  600000 Loss:  0.2799092524310344
Iteration:  700000 Loss:  0.27492118282495415
Iteration:  800000 Loss:  0.27060635392339377
Iteration:  900000 Loss:  0.2668570009225045
Parameters:  
 [[-0.19574729]
 [ 0.06195677]
 [ 0.22061364]
 [ 0.63702315]
 [-0.01061737]
 [-0.15446372]
 [ 0.00870548]
 [-0.00347562]
 [ 0.00635119]
 [ 0.36865838]
 [ 0.19469945]
 [ 0.35795426]]
MSE:  0.486039782944652
Theoretical Optimized MSE:  0.4545180872783981
Theoretical Optimized Params:  
 [[ 0.11890313]
 [ 0.08292512]
 [-0.35321197]
 [ 0.06124528]
 [-0.00408968]
 [-0.02174472]
 [ 0.00663976]
 [-0.00255687]
 [ 0.11737472]
 [ 0.24945799]
 [ 0.32387003]
 [ 0.37039913]]
