<a href="https://colab.research.google.com/github/Matrix7043/Machine_learning101/blob/main/RigidRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd

In [3]:
def Linear(x):
    return x

In [4]:
def MSE(prediction, y, w):
    loss = 1/2*np.power(prediction - y, 2)
    loss_r = np.mean(loss) + 1/2*np.mean(np.power(w, 2))
    return np.mean(loss), loss_r

In [108]:
class RigidRegression:

    def __init__(self, x, y, split, activation, loss, batch_size = None):
        self.x = x
        self.y = y
        ones = np.ones((self.x.shape[0],1))
        self.x = np.hstack((self.x,ones))

        split_ = int(len(x)*split)

        xt = self.x[::]
        yt = self.y[::]

        self.x = xt[:split_]
        self.y = yt[:split_]

        self.x_t = xt[split_:]
        self.y_t = yt[split_:]

        self.y_pred = np.zeros((self.y_t.shape[0], 1))

        self.w = np.random.rand(self.x.shape[1],1)
        self.w_reg = self.w[:-1,:]
        self.w[-1][0] = 1
        self.batch_size = batch_size if batch_size else len(self.x)
        self.activation = activation
        self.loss = loss

    def forward(self, x_batch, y_batch):
        n = np.dot(x_batch, self.w)
        p = self.activation(n)

        c, c_r = self.loss(p, y_batch, self.w_reg)
        return x_batch, y_batch, p, c

    def backward(self, x_batch, y_batch, p, alpha):
        dJdW = np.mean((p - y_batch) * x_batch, axis=0).reshape(-1, 1)
        reg = alpha*self.w_reg
        reg = np.vstack((reg, [0]))
        return dJdW + reg

    def fit(self, epoch, learning_rate, alpha=0.1):
        for i in range(len(self.x)//self.batch_size):
            x_batch = self.x[self.batch_size*i: self.batch_size*(i+1)]
            y_batch = self.y[self.batch_size*i: self.batch_size*(i+1)]
            for _ in range(epoch):
                x_batch, y_batch, p, l = self.forward(x_batch, y_batch)
                if epoch % 10 == 0:
                    print(l)
                dJ_dw = self.backward(x_batch, y_batch, p, alpha)
                self.w = self.w - learning_rate * dJ_dw

    def predict(self):
        xt = self.x_t
        yt = self.y_t
        self.y_pred = self.forward(xt, yt)[2]
        return self.y_pred

    def train_dev(self):
        xt = self.x
        yt = self.y
        self.y_pred = self.forward(xt, yt)[2]
        return self.y_pred

    def metrics(self, test=1):
        if test: yt = self.y_t
        else: yt = self.y
        error = {}
        error['rmse'] = np.power(np.mean(np.power(self.y_pred - yt, 2)), 1/2)
        error['mae'] = np.mean(np.abs(self.y_pred - yt))
        return error


In [52]:
x = pd.read_csv(r'/content/sample_data/california_housing_train.csv')
y_train = x['median_house_value']
x_train = x.drop('median_house_value', axis=1)
x_train = x_train.to_numpy()
y_train = y_train.to_numpy().reshape(-1, 1)


In [53]:
X = np.array([1.25, 1.  , 0.75, 1.5, 1.75, 1.5 , 0.75])
X = np.expand_dims(X,1)
y = np.array([40. , 42. , 46. , 37., 40. , 38. , 39.8])
y = np.expand_dims(y , 1)


In [109]:
lin = RigidRegression(x_train, y_train, 0.9, Linear, MSE)

In [110]:
lin.fit(10000, 0.000000001)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
12942608228.8488
12942509871.280767
12942411516.538187
12942313164.620209
12942214815.525988
12942116469.254673
12942018125.805416
12941919785.177374
12941821447.369696
12941723112.381536
12941624780.212051
12941526450.860392
12941428124.32572
12941329800.607187
12941231479.70395
12941133161.615164
12941034846.33999
12940936533.877586
12940838224.227108
12940739917.387712
12940641613.358564
12940543312.13882
12940445013.727642
12940346718.124191
12940248425.32763
12940150135.337114
12940051848.151814
12939953563.770887
12939855282.1935
12939757003.418816
12939658727.445997
12939560454.274216
12939462183.902624
12939363916.3304
12939265651.556707
12939167389.580711
12939069130.401575
12938970874.018476
12938872620.430576
12938774369.637047
12938676121.637054
12938577876.429773
12938479634.01437
12938381394.39002
12938283157.555893
12938184923.511158
12938086692.254988
12937988463.78656
12937890238.105045
12937792015.209616

In [111]:
yp = lin.train_dev()

In [112]:
er = lin.metrics(test=0)

In [113]:
er

{'rmse': 157964.36492271634, 'mae': 114720.55296368334}

In [114]:
yp = lin.predict()

In [115]:
e = lin.metrics()

In [116]:
e

{'rmse': 180166.2013879806, 'mae': 140813.65481378467}