<a href="https://colab.research.google.com/github/F1ameX/Modern-Methods-of-Deep-Machine-Learning/blob/main/3_multilayer_perceptron/3_multilayer_perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Modules

In [32]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from numpy.typing import ArrayLike
from tqdm.notebook import tqdm

## Distribution

In [33]:
def make_circles(n_samples : int = 100,
                 shuffle : bool = True,
                 noise : float = None,
                 random_state : int = None,
                 factor : float = 0.8):

    rng = np.random.default_rng(seed = random_state)

    n_samples_out = n_samples // 2
    n_samples_in = n_samples - n_samples_out

    linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint = False)
    linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint = False)

    X1_out = np.cos(linspace_out) * 5
    X2_out = np.sin(linspace_out) * 5

    X1_in = np.cos(linspace_in) * 5 * factor
    X2_in = np.sin(linspace_in) * 5 * factor

    X = np.vstack(
        [np.append(X1_out, X1_in), np.append(X2_out, X2_in)]
    ).T

    y = np.hstack (
        [np.zeros(n_samples_out, dtype = int), np.ones(n_samples_in, dtype = int)]
    )

    if noise is not None:
        X += rng.normal(scale = noise, size = X.shape)

    if shuffle:
        permutation = rng.permutation(n_samples)
        X = X[permutation]
        y = y[permutation]

    return X, y


def make_xor(n_samples : int = 100,
             noise : float = 0.2,
             scale : float = 1.0,
             centers : ArrayLike | None = None,
             shuffle : bool = True,
             random_state : int = 42,
             return_centers : bool = False):

    rng = np.random.default_rng(seed = random_state)

    if centers is None:
        centers = np.array(
            [
                (scale, scale),
                (scale, -scale),
                (-scale, scale),
                (-scale, -scale),
             ],
            dtype = float
            )

    n_centers = centers.shape[0]

    base = n_samples // n_centers
    extra = n_samples % n_centers

    clusters = np.zeros(n_centers, dtype = int)

    for k in range(n_centers):
        if k < extra:
            clusters[k] = base + 1
        else:
            clusters[k] = base

    X = np.empty((n_samples, 2))
    y = np.empty(n_samples, dtype = int)

    pos = 0
    for k in range(n_centers):
        normal_matrix = rng.normal(loc = 0, scale = 1, size = (clusters[k], 2))
        X_k = centers[k] + noise * normal_matrix

        if k in [0, 3]:
            y[pos : pos + clusters[k]] = 0
        else:
            y[pos : pos + clusters[k]] = 1

        X[pos : pos + clusters[k]] = X_k
        pos += clusters[k]

    if shuffle:
        permutation = rng.permutation(n_samples)
        X = X[permutation]
        y = y[permutation]

    if return_centers:
        return X, y, centers
    return X, y


def make_blobs(n_samples : int = 100,
               n_features : int = 2,
               centers : int | ArrayLike | None = None,
               cluster_std : float | ArrayLike = 1.0,
               center_box : tuple[float, float] = (-1.0, 1.0),
               shuffle : bool = True,
               random_state : int = 42,
               return_centers : bool = False):

    rng = np.random.default_rng(seed = random_state)

    if centers is None:
        n_centers = 2

        centers = rng.uniform(
            center_box[0], center_box[1], size = (n_centers, n_features)
        )

    elif isinstance(centers, int):
        n_centers = centers
        centers = rng.uniform(
            center_box[0], center_box[1], size = (n_centers, n_features)
        )

    else:
        centers = np.asarray(centers, dtype = float)
        n_centers = centers.shape[0]

    base = n_samples // n_centers
    extra = n_samples % n_centers

    clusters = np.zeros(n_centers, dtype = int)

    for k in range(n_centers):
        if k < extra:
            clusters[k] = base + 1
        else:
            clusters[k] = base

    if isinstance(cluster_std, int | float):
        cluster_std_sigma = cluster_std
        cluster_std = np.repeat(cluster_std_sigma, n_centers)

    X = np.empty((n_samples, n_features))
    y = np.empty(n_samples, dtype = int)

    pos = 0

    for k in range(n_centers):
        normal_matrix = rng.normal(loc = 0, scale = 1, size = (clusters[k], n_features))
        X_k = centers[k] + cluster_std[k] * normal_matrix

        y[pos : pos + clusters[k]] = k
        X[pos : pos + clusters[k]] = X_k
        pos += clusters[k]

    if shuffle:
        permutation = rng.permutation(n_samples)
        X = X[permutation]
        y = y[permutation]

    if return_centers:
        return X, y, centers

    return X, y


def make_spiral(n_samples : int = 100,
                turns : int = 2,
                radius : float = 0.0,
                sweep : float = 0.15,
                shuffle : bool = True,
                noise : float = None,
                random_state : int = 42):

    rng = np.random.default_rng(seed = random_state)

    n_samples_out = n_samples // 2
    n_samples_in = n_samples - n_samples_out

    theta_out = np.linspace(0, turns * 2 * np.pi, n_samples_out, endpoint = False)
    theta_in = np.linspace(0, turns * 2 * np.pi, n_samples_in, endpoint = False)

    spiral_out = radius + sweep * theta_out
    spiral_in = radius + sweep * theta_in

    X1_out = spiral_out * np.cos(theta_out)
    X2_out = spiral_out * np.sin(theta_out)

    X1_in = spiral_in * np.cos(theta_in + np.pi)
    X2_in = spiral_in * np.sin(theta_in + np.pi)

    X = np.vstack(
        [np.append(X1_out, X1_in), np.append(X2_out, X2_in)]
    ).T

    y = np.hstack(
        [np.zeros(n_samples_out, dtype = int), np.ones(n_samples_in, dtype = int)]
    )

    if noise is not None:
        X += rng.normal(scale = noise, size = X.shape)

    if shuffle:
        permutation = rng.permutation(n_samples)
        X = X[permutation]
        y = y[permutation]

    return X, y


def split_data(X: np.array, y: np.array, ratio: float = 0.30, random_state: int = 42):
    rng = np.random.default_rng(seed=random_state)
    idx = np.arange(X.shape[0])
    rng.shuffle(idx)
    left_share = int((1 - ratio) * X.shape[0])

    train_idx = idx[:left_share]
    test_idx  = idx[left_share:]

    return train_idx, test_idx

## MLP

In [34]:
class MultiLayerPerceptron(nn.Module):
    def __init__(self,
                 activation : str = 'sigmoid',
                 layers : int = 4,
                 neurons : int = 3,
                 task : str = 'classification'):

        super().__init__()
        assert activation in ['sigmoid', 'tanh', 'relu']
        assert task in ['classification', 'regression']

        activation_function = {
            'sigmoid' : nn.Sigmoid,
            'tanh' : nn.Tanh,
            'relu' : nn.ReLU,
        }[activation]

        self.loss_function = {
            'classification' : nn.BCELoss(),
            'regression' : nn.MSELoss(),
        }[task]

        dims = [2] + [neurons] * (layers) + [1]

        modules = []
        for i in range(len(dims) - 1):
            modules.append(nn.Linear(dims[i], dims[i + 1]))
            if i != len(dims) - 2:
                modules.append(activation_function())
            else:
                if task == 'classification':
                    modules.append(nn.Sigmoid())

        self.network = nn.Sequential(*modules)

    def forward(self, X) -> torch.Tensor:
        preds = self.network(X)
        return preds

    def fit(self,
            X: torch.Tensor,
            y: torch.Tensor,
            learning_rate : float = 0.01,
            n_epochs : int = 100,
            batch_size : int = 16,
            random_state : int = 42,
            ):
        rng = np.random.default_rng(seed = random_state)

        y = y.reshape(-1, 1)

        _X = X.clone()
        _y = y.clone()

        optimizer = torch.optim.Adam(self.parameters(), lr = learning_rate)

        for epoch in tqdm(
                range(n_epochs),
                desc = "Fitting MLP",
                unit = "epoch",
                total = n_epochs,
                dynamic_ncols = True,
                leave = True,
                mininterval = 0.1,
                disable = True):

            self.train()
            permutation = rng.permutation(X.shape[0])
            _X = X.clone()[permutation]
            _y = y.clone()[permutation]

            for batch_start in range(0, X.shape[0], batch_size):
                _X_batch = _X[batch_start : batch_start + batch_size]
                _y_batch = _y[batch_start : batch_start + batch_size]

                preds = self(_X_batch)
                loss = self.loss_function(preds, _y_batch)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        return self

    def predict(self, X: torch.Tensor) -> torch.Tensor:
        self.eval()
        with torch.no_grad():
            prob = self(X)
            pred = (prob >= 0.5).to(torch.int32)

        return pred

## Classification part

In [35]:
X, y = make_circles(n_samples = 500, noise = 0.5, factor = 0.25, random_state = 52)
train_idx, test_idx = split_data(X, y, ratio = 0.25, random_state = 52)
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train).type(torch.FloatTensor).view(-1, 1)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test).type(torch.FloatTensor).view(-1, 1)

for activation in ['sigmoid', 'tanh', 'relu']:
    stat_table = pd.DataFrame(columns = list(range(1, 6)),
                              index = list(range(2, 5)))

    stat_table.index.name = "Layers"
    stat_table.columns.name = "Neurons"

    for layers in range(2, 5):
        for neurons in range(1, 6):
            model = MultiLayerPerceptron(activation = activation,
                                         layers = layers,
                                         neurons = neurons,
                                         task = 'classification')
            model.fit(X_train, y_train,
                      learning_rate = 0.01,
                      n_epochs = 500,
                      batch_size = 25,
                      random_state = 52)

            accuracy = round((model.predict(X_test) == y_test).float().mean().item(), 3)
            stat_table.loc[layers, neurons] = accuracy

    print(f"Таблица accuracy для функции активации {activation}")
    display(stat_table)
    print('\n\n')

Таблица accuracy для функции активации sigmoid


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.696,0.816,1.0,1.0,1.0
3,0.648,0.848,1.0,1.0,1.0
4,0.648,0.864,0.672,1.0,1.0





Таблица accuracy для функции активации tanh


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.648,0.904,1.0,1.0,1.0
3,0.672,0.896,1.0,1.0,1.0
4,0.648,0.896,1.0,1.0,1.0





Таблица accuracy для функции активации relu


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.632,0.856,1.0,1.0,1.0
3,0.496,0.856,1.0,1.0,1.0
4,0.496,0.496,0.936,1.0,1.0







In [36]:
X, y = make_blobs(n_samples = 500, centers = [-2.5, 2.5], cluster_std = 1.2)
train_idx, test_idx = split_data(X, y, ratio = 0.25, random_state = 52)
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train).type(torch.FloatTensor).view(-1, 1)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test).type(torch.FloatTensor).view(-1, 1)

for activation in ['sigmoid', 'tanh', 'relu']:
    stat_table = pd.DataFrame(columns = list(range(1, 6)),
                              index = list(range(2, 5)))

    stat_table.index.name = "Layers"
    stat_table.columns.name = "Neurons"

    for layers in range(2, 5):
        for neurons in range(1, 6):
            model = MultiLayerPerceptron(activation = activation,
                                         layers = layers,
                                         neurons = neurons,
                                         task = 'classification')
            model.fit(X_train, y_train,
                      learning_rate = 0.01,
                      n_epochs = 500,
                      batch_size = 25,
                      random_state = 52)

            accuracy = round((model.predict(X_test) == y_test).float().mean().item(), 3)
            stat_table.loc[layers, neurons] = accuracy

    print(f"Таблица accuracy для функции активации {activation}")
    display(stat_table)
    print('\n\n')

Таблица accuracy для функции активации sigmoid


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.984,0.984,0.976,0.976,0.976
3,0.984,0.984,0.976,0.976,0.976
4,0.984,0.984,0.976,0.976,0.976





Таблица accuracy для функции активации tanh


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.984,0.984,0.976,0.976,0.984
3,0.984,0.968,0.976,0.976,0.976
4,0.984,0.984,0.976,0.976,0.976





Таблица accuracy для функции активации relu


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.432,0.984,0.968,0.968,0.984
3,0.432,0.968,0.984,0.968,0.968
4,0.432,0.432,0.984,0.968,0.984







In [37]:
X, y = make_xor(n_samples = 500, scale = 2.75, noise = 1.0)
train_idx, test_idx = split_data(X, y, ratio = 0.25, random_state = 52)
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train).type(torch.FloatTensor).view(-1, 1)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test).type(torch.FloatTensor).view(-1, 1)

for activation in ['sigmoid', 'tanh', 'relu']:
    stat_table = pd.DataFrame(columns = list(range(1, 6)),
                              index = list(range(2, 5)))

    stat_table.index.name = "Layers"
    stat_table.columns.name = "Neurons"

    for layers in range(2, 5):
        for neurons in range(1, 6):
            model = MultiLayerPerceptron(activation = activation,
                                         layers = layers,
                                         neurons = neurons,
                                         task = 'classification')
            model.fit(X_train, y_train,
                      learning_rate = 0.01,
                      n_epochs = 500,
                      batch_size = 25,
                      random_state = 52)

            accuracy = round((model.predict(X_test) == y_test).float().mean().item(), 3)
            stat_table.loc[layers, neurons] = accuracy

    print(f"Таблица accuracy для функции активации {activation}")
    display(stat_table)
    print('\n\n')

Таблица accuracy для функции активации sigmoid


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.72,0.688,0.68,0.976,1.0
3,0.736,0.944,1.0,0.744,0.992
4,0.792,0.68,0.992,0.984,0.992





Таблица accuracy для функции активации tanh


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.728,0.68,1.0,0.688,0.992
3,0.736,0.688,1.0,0.992,1.0
4,0.784,1.0,0.992,0.992,1.0





Таблица accuracy для функции активации relu


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.736,0.752,0.952,1.0,1.0
3,0.72,0.752,1.0,0.688,1.0
4,0.44,0.744,0.44,1.0,1.0







In [38]:
X, y = make_spiral(n_samples = 500, sweep = 0.25, turns = 3.5, noise = 0.2, radius = 0.05)
train_idx, test_idx = split_data(X, y, ratio = 0.25, random_state = 52)
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

X_train = torch.from_numpy(X_train).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train).type(torch.FloatTensor).view(-1, 1)
X_test = torch.from_numpy(X_test).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test).type(torch.FloatTensor).view(-1, 1)

for activation in ['sigmoid', 'tanh', 'relu']:
    stat_table = pd.DataFrame(columns = list(range(1, 6)),
                              index = list(range(2, 5)))

    stat_table.index.name = "Layers"
    stat_table.columns.name = "Neurons"

    for layers in range(2, 5):
        for neurons in range(1, 6):
            model = MultiLayerPerceptron(activation = activation,
                                         layers = layers,
                                         neurons = neurons,
                                         task = 'classification')
            model.fit(X_train, y_train,
                      learning_rate = 0.01,
                      n_epochs = 500,
                      batch_size = 25,
                      random_state = 52)

            accuracy = round((model.predict(X_test) == y_test).float().mean().item(), 3)
            stat_table.loc[layers, neurons] = accuracy

    print(f"Таблица accuracy для функции активации {activation}")
    display(stat_table)
    print('\n\n')

Таблица accuracy для функции активации sigmoid


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.528,0.512,0.448,0.44,0.432
3,0.528,0.432,0.528,0.472,0.512
4,0.528,0.528,0.528,0.528,0.432





Таблица accuracy для функции активации tanh


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.424,0.424,0.504,0.52,0.432
3,0.432,0.528,0.432,0.52,0.48
4,0.512,0.456,0.536,0.472,0.512





Таблица accuracy для функции активации relu


Neurons,1,2,3,4,5
Layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.432,0.424,0.48,0.448,0.536
3,0.432,0.432,0.496,0.456,0.456
4,0.432,0.464,0.536,0.536,0.512





