In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.autonotebook import tqdm
from scipy.optimize import minimize
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, train_test_split

  from tqdm.autonotebook import tqdm


In [2]:
url = 'https://raw.githubusercontent.com/DavGev/OMDS_project/master/data.txt'
data = pd.read_csv(url)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
X = data.loc[data.Y.isin(['A', 'G']), data.columns != 'Y']
y = data.loc[data.Y.isin(['A', 'G']), 'Y']
y = pd.get_dummies(y)['G']

X = X.to_numpy()
y = y.to_numpy()


# PART 1 - MLP

- $ E(ω;π) = - \frac{1}{P} ∑_{i=1}^P {[y_i \ln(p_i) + (1 - y_i) \ln(1 - p_i)]} + ρ \|ω\|^2 $
- $ ρ = 10^{−4} $
- $ S(v)_j = \frac{e^{v_j}}{∑_{h=1}^n e^{v_h}} $
- The activation function $g(t) := tanh(t)$

### Hyperparameters
- the number H of hidden layers (max. 4) (only for question 1)
- the number of neurons N of the hidden layers
- the spread $σ > 0$ in the activation function $g$ ($g$ is available in Python with $σ = 1$: `numpy.tanh`)

### Tasks
- Write a program which implements the regularized training error function $E(v,w,b)$
- **Question 1. (grade up to 20)** Use an optimization algorithm from `scipy.optimize` that uses the gradient to determine the parameters $v_j ,w_{ji}, b_j$ which minimize the error.
- **Question 2. (grade up to 10)** Develop an RBF neural network trained by implementing the decomposition method studied in class.

| Ex | H | N | $σ$ | $ρ$ | Optimization | Message | Init train error | Final train error | Final  test error | f\grad evaluations | Time |
| -|-|-|-|-|-|-|-|-|-|-|-|
| Q1 Full MLP |
| Q2 RBF |


\* optimization: with parameters (optimality accuracy, max number of iterations etc)

\* message: in output (successful optimization or others, number of iterations, number of function/gradient evaluations, starting/final value of the objective function, starting/final accuracy etc)

In [5]:
class Sigmoid:
    def __call__(self, x):
        return 1 / (1 + np.exp(-x))

    def grad(self, s_x):
        return s_x * (1 - s_x)


class Tanh:
    def __init__(self, sigma):
        self.sigma = sigma

    def __call__(self, x):
        return np.tanh(self.sigma * x)

    def grad(self, th_x):
        return self.sigma * (1 - th_x ** 2)


class Linear:
    def __call__(self, x):
        return x

    def grad(self, x):
        return 1



In [6]:
class MLPLayer():
    def __init__(self, input_size, output_size, activation):
        self.w = np.random.random((output_size, input_size)) * 0.01
        self.w /= (self.w ** 2).sum() ** 0.5
        self.activation = activation

        self.input = None
        self.output = None
        self.grad_w = None
        self.grad_input = None


    def Forward(self, input):
        self.input = np.insert(input, 0, 1, axis=-1)
        sum = self.input @ self.w.T
        self.output = self.activation(sum)


    def Backward(self, grad_output):
        grad_sum = self.activation.grad(self.output) * grad_output
        self.grad_w = grad_sum.T @ self.input
        self.grad_input = grad_sum @ self.w[:,1:]


In [14]:
class MLP():
    def __init__(self, N, sigma=10):
        '''
        N: array of numbers of neurons in the input layer,
        each hiden layer and the output layer

        For example if our data is 10 dimentional, we need two hidden layers
        with 5 neurons, and we have 2 classes, than N = [10, 5, 5, 2]
        '''
        self.rho = 1e-4
        self.layers = [
            MLPLayer(
                input_size = N[i] + 1,
                output_size = N[i+1],
                activation = Tanh(sigma)
            ) for i in range(len(N) - 1)
        ]
        self.layers[-1].activation = Sigmoid()


    def assign_w(self, w):
        '''
        w: 1d array of all the weights
        '''
        start = 0
        end = 0
        for layer in self.layers:
            end += layer.w.size
            layer.w = w[start : end].reshape(layer.w.shape)
            start = end


    def get_flat(self, what):
        if what == 'w':
            return np.concatenate([layer.w.flatten() for layer in self.layers])
        if what == 'grad_w':
            return np.concatenate([layer.grad_w.flatten() for layer in self.layers])


    def predict(self, X):
        ipnut = np.asarray(X)
        for layer in self.layers:
            layer.Forward(ipnut)
            ipnut = layer.output
        return ipnut


    def error(self, X, y):
        y = y.reshape([-1, 1])
        p = self.predict(X)
        error = - (y * np.log(p) + (1-y) * np.log(1-p)).mean()
        error += self.rho * (self.get_flat('w') ** 2).sum()
        return error


    def gradient(self, X, y):
        y = y.reshape([-1, 1])
        p = self.predict(X)
        grad_output = - (y / p - (1-y) / (1-p)) / X.shape[0]
        for layer in self.layers[::-1]:
            layer.Backward(grad_output)
            grad_output = layer.grad_input

        grad = self.get_flat('grad_w')
        grad += 2 * self.rho * self.get_flat('w')
        return grad


    def fit(self, X, y, method='BFGS'):

        def fun(w):
            self.assign_w(w)
            return self.error(X, y)

        def jac(w):
            self.assign_w(w)
            return self.gradient(X, y)

        w0 = self.get_flat('w')
        message = minimize(fun=fun, jac=jac, x0=w0, method=method)
        self.assign_w(message.x)
        return message


## Cross Validation

In [None]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
models_data = []
for N_layers in tqdm(range(5)):
    for N_neurons in tqdm(range(4, 17, 4) if N_layers != 0 else range(1)):
        N = [16] + [N_neurons] * N_layers + [1]

        for log_sigma in tqdm(range(-2, 3)):
            sigma = 10 ** log_sigma

            kf = KFold(n_splits=5, shuffle=True)
            kf.get_n_splits(X_train_val)

            for train_index, valid_index in kf.split(X_train_val):
                scaler = StandardScaler()
                model = MLP(N, sigma)

                X_train = X_train_val[train_index]
                y_train = y_train_val[train_index]
                X_val = X_train_val[valid_index]
                y_val = y_train_val[valid_index]

                X_train_scaled = scaler.fit_transform(X_train)
                model.fit(X_train_scaled, y_train)

                X_val_scaled = scaler.transform(X_val)
                error = model.error(X_val_scaled, y_val)

                models_data.append({'N_layers': N_layers,
                                    'N_neurons': N_neurons,
                                    'sigma': sigma,
                                    'error': error})

                pd.DataFrame(models_data).to_csv('/content/drive/MyDrive/Colab Notebooks/models_data.csv')

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

In [24]:
print(list(range(-2, 3)))

[-2, -1, 0, 1, 2]
