In [None]:
import sys
sys.path.insert(0, '../../..')
from metrics import evaluate_classification
from plots import plot_decision_boundary, plot_data
from matplotlib import pyplot as plt
from sklearn.datasets import make_classification, make_moons
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

In [None]:
class NeuralNet:
    
    def __init__(self, hidden_layer_sizes, activ_funcs, normalize = True, learning_rate = 0.01, num_iter = 30000, eps = 10**-2, beta = 0.999):
        self.layer_sizes = hidden_layer_sizes 
        self.layers_count = len(self.layer_sizes) + 1
        self.activ_funcs = activ_funcs
        self.normalize = normalize 
        self.learning_rate = learning_rate
        self.num_iter = num_iter
        self.eps = eps
        self.beta = beta
        self.COST_APPEND_T = 1
        
    def __normalize(self, X, mean = None, std = None):
        m = mean
        if m is None:
            m = np.array([np.mean(X, axis=1)]).T
        s = std
        if s is None:
            s = np.array([np.std(X, axis=1)]).T
        X_new = (X - m) / s
        return X_new, m, s

    def __sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    def __tanh(self, Z):
        return (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))

    def __relu(self, Z):
        A = np.maximum(0, Z)
        return A
    
    def __sigmoid_derivative(self, Z):
        return np.multiply(Z, 1 - Z)
    
    def __softmax(self, Z):       
        ex = np.exp(Z)        
        return ex / np.sum(ex, axis=0, keepdims = True)
    
    def __initialize_parameters(self):
        self.parameters = {}
        self.rmsprop = {}
        n_i = self.layer_sizes
        for i in range(1, self.layers_count + 1):
            self.parameters[f"W{i}"] = np.random.randn(n_i[i], n_i[i - 1]) * np.sqrt(2/n_i[i - 1])
            self.parameters[f"b{i}"] = np.zeros((n_i[i], 1))
            self.rmsprop[f"SdW{i}"] = np.zeros((n_i[i], n_i[i - 1]))
            self.rmsprop[f"Sdb{i}"] = np.zeros((n_i[i], 1))
       
    def __forward_propagation(self, X):
        cache = {"A0" : X}
        for i in range(1, self.layers_count + 1):
            cache[f"Z{i}"] = np.dot(self.parameters[f"W{i}"], cache[f"A{i - 1}"]) + self.parameters[f"b{i}"]
            cache[f"A{i}"] = self.__softmax(cache[f"Z{i}"]) if i == self.layers_count else self.__sigmoid(cache[f"Z{i}"])

        return cache[f"A{self.layers_count}"], cache
        
    def compute_cost(self, A, Y):
        m = Y.shape[1]
        res = Y * np.log(A) + (1 - Y) * np.log(1 - A)
        J = -(1 / m) * np.sum(res)
        return J
        
    def __backward_propagation(self, X, Y, cache):
        m = X.shape[1]
        gradients = {}
        for i in reversed(range(1, self.layers_count + 1)):
            if i == self.layers_count:
                gradients[f"dZ{i}"] = cache[f"A{i}"] - Y
            else:
                dAi = np.dot(self.parameters[f"W{i + 1}"].T, gradients[f"dZ{i + 1}"])
                gradients[f"dZ{i}"] = np.multiply(dAi, self.__sigmoid_derivative(cache[f"A{i}"]))
                
            gradients[f"dW{i}"] = (1/m) * np.dot (gradients[f"dZ{i}"], cache[f"A{i - 1}"].T)  
            gradients[f"db{i}"] = (1/m) * np.sum(gradients[f"dZ{i}"], axis = 1, keepdims = True)
                
        return gradients
    
    def __update_parameters(self, gradients):
        # for i in range(1, self.layers_count + 1):
        #     dWi = gradients[f"dW{i}"]
        #     dbi = gradients[f"db{i}"]
        #     self.parameters[f"W{i}"] -= self.learning_rate * dWi
        #     self.parameters[f"b{i}"] -= self.learning_rate * dbi
        for i in range(1, self.layers_count + 1):
            self.rmsprop[f"SdW{i}"] = self.beta * self.rmsprop[f"SdW{i}"] + (1 - self.beta) * gradients[f"dW{i}"]**2
            self.rmsprop[f"Sdb{i}"] = self.beta * self.rmsprop[f"Sdb{i}"] + (1 - self.beta) * gradients[f"db{i}"]**2
        
        for i in range(1, self.layers_count + 1):
            self.parameters[f"W{i}"] -= self.learning_rate * gradients[f"dW{i}"] / (np.sqrt(self.rmsprop[f"SdW{i}"]) + 1e-8)
            self.parameters[f"b{i}"] -= self.learning_rate * gradients[f"db{i}"] / (np.sqrt(self.rmsprop[f"Sdb{i}"]) + 1e-8)
    
    def fit(self, X_vert, Y_vert, print_cost = True):
        
        lb = LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False) 
        lb.fit(Y_vert)
        X, Y = X_vert.T, lb.transform(Y_vert).T
        
        self.layer_sizes.insert(0, X.shape[0]) #Input layer
        self.layer_sizes.append(Y.shape[0]) #Output layer
            
        if self.normalize: #Normalize
            X, self.__mean, self.__std = self.__normalize(X)
                
        self.__initialize_parameters()
        
        costs = [] #Costs log
        for i in range(self.num_iter):
            A, cache = self.__forward_propagation(X)

            cost = self.compute_cost(A, Y)

            gradients = self.__backward_propagation(X, Y, cache)

            self.__update_parameters(gradients)

            if print_cost and i % 1000 == 0: 
                print("{}-th iteration: {}".format(i, cost))

            if i % self.COST_APPEND_T == 0: 
                costs.append(cost)
            
            if(i>=self.COST_APPEND_T*2):
                if(abs(costs[-1] - costs[-2]) < self.eps):
                    break

        if print_cost:
            plt.plot(costs)
            plt.ylabel("Cost")
            plt.xlabel(f"Iteration, *{self.COST_APPEND_T}")
            plt.show()

    def __gradient(self, parameters, gradients, AL, X, Y, cost_function, eps=1e-7):       
        theta_plus = np.copy(Y).astype(float)
        theta_plus += eps
        J_plus = cost_function(AL, theta_plus)

        theta_minus = np.copy(Y).astype(float)
        theta_minus -= eps
        J_minus = cost_function(AL, theta_minus)

        gradapprox = (J_plus - J_minus) / (2 * eps)

        numerator = np.linalg.norm(gradients - gradapprox)
        denominator = np.linalg.norm(gradients) + np.linalg.norm(gradapprox)
        difference = numerator / denominator
        
        return difference
    
        
    def predict_proba(self, X_vert):
        X = X_vert.T
        if self.normalize:
            X, _, _ = self.__normalize(X, self.__mean, self.__std)    
        
        probabilities = self.__forward_propagation(X)[0]
        return probabilities.T
        
    def predict(self, X_vert):
        probs = self.predict_proba(X_vert)
        results_bin = (probs == probs.max(axis=1)[:, None]).astype(int)
        return results_bin

# Симульовані дані 1

In [None]:
X, y = make_classification(n_samples = 200, n_classes = 2, n_features = 2, 
                           n_informative=2, n_redundant=0, random_state = 42,
                           flip_y=0.02, class_sep=0.8)
plot_data(X, y)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y.reshape(-1,1), test_size=0.2, random_state=42)

hidden_layers_1 = [X.shape[1], 20, 20, 20, 20, 20, 1]
activ_funcs_1 = ['relu', 'relu', 'relu', 'relu', 'relu', 'sigmoid']

nn = NeuralNet(hidden_layers_1, activ_funcs_1)
nn.fit(X_train, y_train)

y_test_pred = nn.predict(X_test)

accuracy, report, confusion = evaluate_classification(y_test, y_test_pred, normalize = True, learning_rate = 0.01, num_iter = 30000, eps = 10**-2, beta = 0.999)
print(f"Accuracy: \n{accuracy}")
print(f"Report: \n{report}")
print(f"Confusion: \n{confusion}")
plot_decision_boundary(nn, X_test, y_test)
plt.show()

# Симульовані дані 2

In [None]:
X, y = make_moons(n_samples=500, noise=0.2, random_state=42)

plot_data(X, y)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y.reshape(-1,1), test_size=0.2, random_state=42)

hidden_layers_1 = [X.shape[1], 10, 10, 5]
activ_funcs_1 = ['relu', 'relu', 'relu', 'sigmoid']

nn = NeuralNet(hidden_layers_1, activ_funcs_1, learning_rate = 0.5)
nn.fit(X_train, y_train)

y_test_pred = nn.predict(X_test)

accuracy, report, confusion = evaluate_classification(y_test, y_test_pred)
print(f"Accuracy: \n{accuracy}")
print(f"Report: \n{report}")
print(f"Confusion: \n{confusion}")
plot_decision_boundary(nn, X_test, y_test)
plt.show()

# Tests dataset

In [None]:
data_columns = ["test1", "test2"]
target_column = "passed"
df = pd.read_csv("tests.csv")
X, y = df[data_columns].values, df[target_column].values

plot_data(X, y)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y.reshape(-1,1), test_size=0.2, random_state=42)

In [None]:
hidden_layers_1 = [X.shape[1], 10, 10, 5]
activ_funcs_1 = ['relu', 'relu', 'relu','sigmoid']

nn = NeuralNet(hidden_layers_1, activ_funcs_1, learning_rate = 0.5)
nn.fit(X_train, y_train)

y_test_pred = nn.predict(X_test)

accuracy, report, confusion = evaluate_classification(y_test, y_test_pred)
print(f"Accuracy: \n{accuracy}")
print(f"Report: \n{report}")
print(f"Confusion: \n{confusion}")
plot_decision_boundary(nn, X_test, y_test)
plt.show()

In [None]:
print(nn.parameters)

Зверніть увагу на границю прийняття рішення. Модель старається побудувати складну криву, що може свідчити про її перетренування. Порівняйте отримані результати з класом з sklearn. Спробуйте додати нові шари для нашого класу та порівняти результати тоді. Поекспериментуйте з гіперпараметрами для обох класів.

In [None]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes = (20,), max_iter = 10000)#, activation = 'logistic', solver = 'sgd', learning_rate_init = 0.01, learning_rate = 'constant')

clf.fit(X_train, y_train.ravel())
y_test_pred = clf.predict(X_test)

accuracy, report, confusion = evaluate_classification(y_test, y_test_pred)
print(f"Accuracy: \n{accuracy}")
print(f"Report: \n{report}")
print(f"Confusion: \n{confusion}")
plot_decision_boundary(clf, X_test, y_test)
plt.show()

In [None]:
clf.n_iter_