In [2]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

X, y = make_regression(n_samples=30_000, n_features=100, noise=5, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5)

Задача 1

In [3]:
class LinearRegression_castom():
    def __init__(
        self,
        learning_rate=0.1,
        max_itter=10,
        log=True,
    ):
        self.learning_rate = learning_rate
        self.max_itter = max_itter
        self.log = log
    
    @staticmethod
    def __init_weights(n_in, n_out):
        """
        Normal Xavier initialization
        """
        return np.random.normal(loc=0.0, scale=np.sqrt(2/(n_in +n_out)), size=(n_out, n_in))

    @staticmethod
    def get_loss(y_true, y_pred):
        """
        Get MSE loss
        """
        assert y_true.size == y_pred.size
        return np.sum((y_true - y_pred)**2) / y_true.size

    @staticmethod
    def get_loss_grad(y_true, y_pred):
        """
        Get MSE loss
        """
        assert y_true.size == y_pred.size
        return -2*(y_true - y_pred)

    def train(self, X, y):
        size, n_fetures = X.shape
        self.b = 0 
        self.weights = self.__init_weights(n_in = n_fetures, n_out = 1)
        for _ in range(self.max_itter):
            predictions = (X @ self.weights.T + self.b).flatten()
            grads = np.clip(self.get_loss_grad(y, predictions),-1,1)
            self.weights = self.weights - self.learning_rate * (grads.T @ X)
            self.b = self.b - self.learning_rate * np.mean(grads.T)
            if self.log:
                print(self.get_loss(y, predictions))
    def predict(self, X):
        return (X @ self.weights.T + self.b)

In [4]:
clf = LinearRegression()
clf.fit(X_train, y_train)
print(mean_squared_error(clf.predict(X_val), y_val))

25.43345560250734


In [5]:
model = LinearRegression_castom(0.0001, 10000, False)
model.train(X_train, y_train)
mean_squared_error(model.predict(X_val), y_val)

25.50977941086595

In [18]:
class MLPLinearRegression():
    def __init__(
        self,
        hidden_layer_sizes=(100, 200, 300),
        learning_rate=0.1,
        max_itter=10
    ):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.learning_rate = learning_rate
        self.max_itter = max_itter

    @staticmethod
    def __init_weights(n_in, n_out):
        """
        Normal Xavier initialization
        """
        return np.random.normal(loc=0.0, scale=np.sqrt(2/(n_in+n_out)), size=(n_in, n_out))

    @staticmethod
    def get_loss(y_true, y_pred):
        """
        Get MSE loss
        """
        assert y_true.size == y_pred.size
        cost = np.mean((y_pred - y_true)**2)
        return cost

    @staticmethod
    def get_loss_grad(y_true, y_pred):
        """
        Get MSE loss
        """
        assert y_true.size == y_pred.size
        return 2*(y_pred - y_true)

    @staticmethod
    def get_relu(x):
        return np.maximum(0, x)
    @staticmethod
    def get_relu_diff(x):
        return np.where(x <= 0, 0, 1)

    def forward(self, X):
        X = X
        self.later_prom_res = []
        for layer in self.weights:
            index, bais, layer_weights, func = layer
            pre_activ =  X @ layer_weights + bais
            if func == "RELU":
                X = self.get_relu(pre_activ.copy())
            elif func is None:
                X = pre_activ
            else:
                raise ValueError()
            # print("X:", X.shape)
            self.later_prom_res.append([pre_activ, X.copy()])
        return X


    def train(self, X, y):
        # self.iters = np.linspace(0.001,0.00001,self.max_itter)
        X_, y_ = X, y

        y_ = y_.reshape(-1,1)
        size, n_fetures = X.shape
        self.weights = []

        prev_hid_layer_size = n_fetures
        for index, curr_hidden_layer_size in enumerate(self.hidden_layer_sizes):
            weight = self.__init_weights(n_in=prev_hid_layer_size, n_out=curr_hidden_layer_size)
            bais = self.__init_weights(n_in=1, n_out=curr_hidden_layer_size)
            self.weights.append([index, bais, weight, "RELU"])
            prev_hid_layer_size = curr_hidden_layer_size
        last_layer = self.__init_weights(n_in=prev_hid_layer_size, n_out=1)
        bais = self.__init_weights(n_in=1, n_out=1)
        self.weights.append([index + 1, bais, last_layer, None])  
        for i in range(0, self.max_itter):
            result_forward = self.forward(X_)
            error = self.get_loss(y_, result_forward.reshape(-1,1))
            if i % 200 == 0:
                print(i, error)
            self.backward(X_,y_)

    def predict(self, X):
        return self.forward(X).reshape(-1,1)

    def backward(self, X, y):
        y_predicted =  self.later_prom_res[-1][1].reshape(-1,1)
        lst_deltas = [self.get_loss_grad(y, y_predicted)]
        
        for layer_index in range(len(self.weights) - 2, -1, -1):
            w_l_plis_1 = self.weights[layer_index + 1][2]
            delta  = lst_deltas[-1] @ w_l_plis_1.T
            delta = delta * self.get_relu_diff(self.later_prom_res[layer_index][0])
            lst_deltas.append(delta)
        
        lst_deltas = list(reversed(lst_deltas))
        
        for layer_index in range(len(lst_deltas) - 1, -1, -1):
            prev_a_x = self.later_prom_res[layer_index - 1][1] if layer_index > 0 else X
            delta_l_plus_1 = lst_deltas[layer_index]
            mat_mull =  np.clip((prev_a_x.T @ delta_l_plus_1), -10, 10)
            self.weights[layer_index][2] = self.weights[layer_index][2] - self.learning_rate * mat_mull
            self.weights[layer_index][1] = self.weights[layer_index][1] - self.learning_rate * np.mean(np.clip(delta_l_plus_1,-10,10), axis=0, keepdims=True)


In [19]:
mlpLinReg = MLPLinearRegression(
    hidden_layer_sizes=(5,10,10),
    learning_rate=0.0003,
    max_itter=10000) 
mlpLinReg.train(X_train, y_train)

0 34675.97471965502
200 19389.96535419596
400 952.4779159160378
600 388.49941316246463
800 303.15514350074284
1000 240.01893035497937
1200 210.84419252500572
1400 186.61656063769848
1600 165.13508166907206
1800 146.7059773918965
2000 130.43096566832827
2200 117.36227249541639
2400 105.46618402690866
2600 96.34341646020519
2800 88.81202606442464
3000 82.89066557904634
3200 78.32117732717037
3400 74.41940569260372
3600 71.68585745828963
3800 68.51164742116904
4000 66.44502339392078
4200 64.64856517614345
4400 63.198467590491305
4600 62.486781023875096
4800 61.97113955415869
5000 61.27812346020093
5200 60.86962075522099
5400 60.62897940699146
5600 60.349601787962456
5800 60.26279342357896
6000 60.158051225685206
6200 60.08983529147338
6400 59.93687838489051
6600 59.81669131393653
6800 59.73957405184829
7000 59.63379362960576
7200 59.5653721165485
7400 59.41294413596589
7600 59.34403224788187
7800 59.169361776882894
8000 59.045203703786136
8200 58.98756080471048
8400 58.90284214758398
8600

In [21]:
mean_squared_error(mlpLinReg.predict(X_val), y_val)

59.991064033952334

# Useful resourses:
* http://neuralnetworksanddeeplearning.com/chap2.html
* https://hackmd.io/@machine-learning/blog-post-cnnumpy-slow (use VPN)
* https://ml-cheatsheet.readthedocs.io/en/latest/backpropagation.html#