## Bài tập thực hành Nhập môn máy học - Lab 08
## MSSV: 18110014 - Họ tên: Nguyễn Phú Thành

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## 1. Hãy cài đặt thêm một module để chọn ra được bộ weights sao cho accuracy trên tập validation là tốt nhất

In [2]:
def one_hot_vector(y):
    out = np.zeros((y.shape[0], y.max() + 1))
    for i in range(y.shape[0]):
        out[i, y[i]] = 1
    return out

In [3]:
train = pd.read_csv("https://raw.githubusercontent.com/huynhthanh98/ML/master/lab-08/bt_train.csv")
valid = pd.read_csv("https://raw.githubusercontent.com/huynhthanh98/ML/master/lab-08/bt_valid.csv")

In [4]:
X_train, y_train = train.iloc[:, :2].to_numpy(), train.iloc[:, -1].to_numpy()
X_valid, y_valid = valid.iloc[:, :2].to_numpy(), valid.iloc[:, -1].to_numpy()
mean, std = np.mean(X_train, axis = 0), np.std(X_train, axis = 0)

In [5]:
X_train = (X_train - mean)/std
y_train = one_hot_vector(y_train)
X_valid = (X_valid - mean)/std

In [6]:
X_train, X_valid = X_train.T, X_valid.T
y_train = y_train.T

In [7]:
class InputLayer:
    def __init__(self, inputs):
        self.inputs = inputs.copy()
        self.ac_X = self.inputs

class HiddenLayer:
    def __init__(self, activation_func, derivative_activation, input_shape, output_shape, weights = None, bias = None, random_state = 0):
        self.activation, self.derivative = activation_func, derivative_activation
        self.inp, self.outp = input_shape, output_shape
        if weights is None:
            rnd = np.random.RandomState(random_state)
            self.weights = rnd.normal(loc = 0, scale = 1, size = (self.outp, self.inp))
        else:
            self.weights = weights.copy()
        
        if bias is None:
            rnd = np.random.RandomState(random_state)
            self.bias = rnd.normal(loc = 0, scale = 1, size = (self.outp, 1))
        else:
            self.bias = bias.copy()
    
    def forward(self, X):
        assert X.shape[0] == self.inp, f'{X.shape} and {self.weights.shape} do not match'
        self.z_X = self.weights @ X + self.bias
        self.ac_X = self.activation(self.z_X)
        return self.ac_X
    
    def update(self, weight_derivative, bias_derivative, lr = 1e-3):
        self.weights -= lr * weight_derivative
        self.bias -= lr * bias_derivative

In [8]:
# Implementing delta rule based on: http://cs229.stanford.edu/notes-spring2019/backprop.pdf
class NeuralNetwork:
    
    def __init__(self, layers):
        
        assert isinstance(layers[0], InputLayer), 'First layer in layers list must be an instance of InputLayer'
        for i, layer in enumerate(layers[1:]):
            assert isinstance(layer, HiddenLayer), f'{i + 1}th layer is not an instance of HiddenLayer'
        self.layers, self.number_layers = layers, len(layers)
    
    def forward_fit(self):
        return self.forward(self.layers[0].inputs)
    
    def forward(self, X):
        current_X = X
        for layer in self.layers[1:]:
            current_X = layer.forward(current_X)
        outputs = current_X.copy()
        return outputs
    
    def __backpropagation(self, deltas, lr = 1e-3):
        
        assert len(deltas) == 1 and isinstance(deltas, list)
        
        for l in range(self.number_layers - 2, 0, -1):
            delta_l = (self.layers[l + 1].weights.T @ deltas[-1]) * self.layers[l].derivative(self.layers[l].z_X)
            deltas.append(delta_l)
        
        deltas = [None,] + deltas[::-1]
        
        for l in range(1, self.number_layers):
            weights_gradient = deltas[l] @ self.layers[l - 1].ac_X.T
            bias_gradient = np.sum(deltas[l], axis = 1).reshape(-1, 1)
            self.layers[l].update(weights_gradient, bias_gradient, lr)
    
    def backward(self, lr, deltas = None):
        self.__backpropagation(deltas, lr = lr)
        return self

In [9]:
def relu(X):
    return np.where(X >= 0, X, 0).reshape(X.shape)
def relu_deriv(X):
    return np.where(X >= 0, 1, 0).reshape(X.shape)

def softmax(X):
    return np.exp(X)/np.sum(np.exp(X), axis = 0)

def softmax_deriv(X):
    return softmax(X) * (1 - softmax(X))

def crossEntropyLoss(y_pred, y):
    return -np.mean(y * np.log(y_pred))

In [10]:
# Xavier/He Weights Initialization
rnd = np.random.RandomState(0)
W_1 = rnd.normal(loc = 0, scale = np.sqrt(np.sqrt(2/7)), size = 10).reshape(5, 2)
W_2 = rnd.normal(loc = 0, scale = np.sqrt(np.sqrt(1/5)), size = 25).reshape(5, 5)
W_3 = rnd.normal(loc = 0, scale = np.sqrt(np.sqrt(1/4)), size = 15).reshape(3, 5)

In [11]:
inputs = InputLayer(X_train)

hidden_layer_1 = HiddenLayer(
    relu, relu_deriv, 
    input_shape = 2, 
    output_shape = 5, 
    weights = W_1,
    bias = np.zeros((5, 1))
)

hidden_layer_2 = HiddenLayer(
    relu, relu_deriv, 
    input_shape = 5, output_shape = 5,
    weights = W_2,
    bias = np.zeros((5, 1))
)

outputs_layer = HiddenLayer(
    softmax, softmax_deriv, 
    input_shape = 5, output_shape = 3,
    weights = W_3,
    bias = np.zeros((3, 1))
)

layers = [inputs, hidden_layer_1, hidden_layer_2, outputs_layer]

In [12]:
model = NeuralNetwork(layers)
learning_rate = 1e-5

In [13]:
for epoch in range(10000):
    y_predict = model.forward_fit()
    err = crossEntropyLoss(y_predict, y_train)
    deltas = [(y_predict - y_train), ]
    model.backward(learning_rate, deltas = deltas)
    
    if epoch % 1000 == 0:
        print(f'Loss at epoch {epoch + 1} = {err}')

Loss at epoch 1 = 0.9054156080014535
Loss at epoch 1001 = 0.09903687116188255
Loss at epoch 2001 = 0.09252157060749003
Loss at epoch 3001 = 0.0900481598230015
Loss at epoch 4001 = 0.08802110390296078
Loss at epoch 5001 = 0.08691862834524443
Loss at epoch 6001 = 0.08637856676708464
Loss at epoch 7001 = 0.08607412398314228
Loss at epoch 8001 = 0.08582498628969219
Loss at epoch 9001 = 0.08560409543555238


In [14]:
# Validation accuracy
np.sum(np.argmax(model.forward(X_valid), axis = 0) == y_valid)/y_valid.shape[0]

0.6366666666666667

In [15]:
model.layers[1].weights

array([[ 1.37718074,  0.19974955],
       [ 0.75031617,  1.57350461],
       [ 1.66724353, -0.67486532],
       [ 0.72086112, -0.26184145],
       [-1.30794726,  0.17208461]])

In [16]:
model.layers[1].bias

array([[0.1255196 ],
       [0.63187546],
       [0.02784213],
       [0.78581558],
       [0.28105539]])

In [17]:
model.layers[2].weights

array([[ 0.46761282,  1.12426381,  1.03605959,  0.38676033,  0.03701571],
       [-0.09157087,  0.57934273, -0.32959694,  0.26076841, -0.73564545],
       [-1.61618643,  0.79291794,  0.51021437, -0.67565562,  1.83027268],
       [-1.13362207, -0.37797591, -0.21898696,  1.17646538,  1.23182072],
       [ 0.10853918,  0.26951828, -0.59277399, -1.32434061, -0.22336907]])

In [18]:
model.layers[2].bias

array([[-0.04152722],
       [-0.02346769],
       [ 0.05036443],
       [ 0.63380937],
       [ 0.00355501]])

## 2. Từ bộ dữ liệu bên dưới hãy cài đặt backpropagation cho bài toán phân biệt ung thư vú. Hãy tự chọn số layers và số nodes mà mình cho là thích hợp, cũng như là nêu ra số layers và số nodes của mỗi layer mà mình đã chọn. Tính accuracy trên tập training

In [19]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [20]:
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [21]:
X_mean = np.mean(X_train, axis = 0).reshape(1, -1)
X_std = np.std(X_train, axis = 0).reshape(1, -1)

X_valid = (X_valid - X_mean)/X_std
X_train = (X_train - X_mean)/X_std

In [22]:
X_train, X_valid = X_train.T, X_valid.T
y_train = one_hot_vector(y_train).T

<ul>
    <li>
        <h3> Số lượng layers: 5 (bao gồm Input layer) </h3>
        <ol>
            <li>
                Layer 1: Input layer
                <ul>
                    <li> Số node: 30 </li>
                </ul>
            </li>
            <li>
                Layer 2: Hiddent layer 1
                <ul>
                    <li> Số node: 20 </li>
                    <li> Activation function: Linear </li>
                </ul>
            </li>
            <li>
                Layer 3: Hidden layer 2
                <ul>
                    <li> Số node: 20 </li>
                    <li> Activation function: Softmax </li>
                </ul>
            </li>
            <li>
                Layer 4: Hidden layer 3
                <ul>
                    <li> Số node: 10 </li>
                    <li> Activation function: Linear </li>
                </ul>
            </li>
            <li>
                Layer 5: Output layer
                <ul>
                    <li> Số node: 2 </li>
                    <li> Activation function: Softmax </li>
                </ul>
            </li>
        </ol>
    </li>
</ul>

In [23]:
def linearFunc(X):
    return X.copy()

def linearDeriv(X):
    return np.ones(X.shape, dtype = X.dtype)

In [24]:
# Xavier Weights Initialization for linear/sigmoid activation function
rnd = np.random.RandomState(0)
W_1 = rnd.uniform(low = -1/np.sqrt(30), high = 1/np.sqrt(30), size = 600).reshape(20, 30)
W_2 = rnd.uniform(low = -1/np.sqrt(20), high = 1/np.sqrt(20), size = 400).reshape(20, 20)
W_3 = rnd.uniform(low = -1/np.sqrt(20), high = 1/np.sqrt(20), size = 200).reshape(10, 20)
W_4 = rnd.uniform(low = -1/np.sqrt(10), high = 1/np.sqrt(10), size = 20).reshape(2, 10)

In [25]:
inputs = InputLayer(X_train)

hidden_layer_1 = HiddenLayer(
    linearFunc, linearDeriv, 
    input_shape = 30, output_shape = 20, 
    weights = W_1,
    bias = np.zeros((20, 1))
)

hidden_layer_2 = HiddenLayer(
    softmax, softmax_deriv, 
    input_shape = 20, output_shape = 20,
    weights = W_2,
    bias = np.zeros((20, 1))
)

hidden_layer_3 = HiddenLayer(
    linearFunc, linearDeriv,
    input_shape = 20, output_shape = 10,
    weights = W_3,
    bias = np.zeros((10, 1))
)

outputs_layer = HiddenLayer(
    softmax, softmax_deriv, 
    input_shape = 10, output_shape = 2,
    weights = W_4,
    bias = np.zeros((2, 1))
)

layers = [
    inputs, 
    hidden_layer_1, 
    hidden_layer_2, 
    hidden_layer_3, 
    outputs_layer
]

model = NeuralNetwork(layers)

In [26]:
learning_rate = 1e-5
for epoch in range(10000):
    y_predict = model.forward_fit()
    err = crossEntropyLoss(y_predict, y_train)
    deltas = [y_predict - y_train, ]
    model.backward(learning_rate, deltas = deltas)
    
    if epoch % 1000 == 0:
        print(f'Loss at epoch {epoch + 1} = {err}')

Loss at epoch 1 = 0.34485360860141645
Loss at epoch 1001 = 0.32666028467515945
Loss at epoch 2001 = 0.32112527887156944
Loss at epoch 3001 = 0.3006582985820205
Loss at epoch 4001 = 0.17096822138709328
Loss at epoch 5001 = 0.07947332241577691
Loss at epoch 6001 = 0.05378556595997626
Loss at epoch 7001 = 0.0424174313694559
Loss at epoch 8001 = 0.03649459736942651
Loss at epoch 9001 = 0.033091324766509216


In [27]:
# Training accuracy
np.sum(np.argmax(model.forward(X_train), axis = 0) == np.argmax(y_train, axis = 0))/y_train.shape[1]

0.9802197802197802

In [28]:
# Testing accuracy
np.sum(np.argmax(model.forward(X_valid), axis = 0) == y_valid)/y_valid.shape[0]

0.9912280701754386

In [29]:
np.argmax(model.forward(X_train), axis = 0)

array([1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,

In [30]:
np.argmax(y_train, axis = 0)

array([1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,

In [31]:
np.argmax(model.forward(X_valid), axis = 0)

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0])

In [32]:
y_valid

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0])