<a href="https://colab.research.google.com/github/Gennadiy-Korobeynikov/iris_classification_interactive/blob/main/iris_classification_interactive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random
from sklearn import datasets
from sklearn import model_selection # for split

def sigmoid(z):
    """The sigmoid function.
    Сигмоида
    """
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function.
    Производная сигмоиды по e (шутка). По x
    """
    return sigmoid(z)*(1-sigmoid(z))

def cost_function(network, test_data, onehot=True):
    c = 0
    for example, y in test_data:
        if not onehot:
            y = np.eye(3, 1, k=-int(y))
        yhat = network.feedforward(example)
        c += np.sum((y - yhat)**2)
    return c / len(test_data)

In [None]:
class Network:

    def __init__(self, sizes, output=True):

        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]
        self.output = output

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):


        if test_data is not None: n_test = len(test_data)
        n = len(training_data)
        success_tests = 0
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            #if test_data is not None and self.output:
                #success_tests = self.evaluate(test_data)
                #print("Эпоха {0}: {1} / {2}".format(
                #    j, success_tests, n_test))
         #   elif self.output:
          #      print("Эпоха {0} завершена".format(j))
        if test_data is not None:
            return self.evaluate(test_data) / n_test

    def update_mini_batch(self, mini_batch, eta):

        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        eps = eta / len(mini_batch)
        self.weights = [w - eps * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases  = [b - eps * nb for b, nb in zip(self.biases,  nabla_b)]

    def backprop(self, x, y):

        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # прямое распространение (forward pass)
        a = x
        self.ass = [x]
        self.zs = []
        for b, w in zip(self.biases, self.weights):
            z = w@a+b
            self.zs.append(z)
            a = sigmoid(z)
            self.ass.append(a)

        # обратное распространение (backward pass)
        delta = (a-y)*(sigmoid_prime(self.zs[-1])) # ошибка выходного слоя
        nabla_b[-1] = delta # производная J по смещениям выходного слоя
        nabla_w[-1] = delta@(self.ass[-2].T)# производная J по весам выходного слоя

        # Здесь l = 1 означает последний слой,
        # l = 2 - предпоследний и так далее.
        for l in range(2, self.num_layers):
            delta = ( (self.weights[-l+1].T)@delta)*sigmoid_prime(self.zs[-l]) # ошибка на слое L-l
            nabla_b[-l] = delta # производная J по смещениям L-l-го слоя
            nabla_w[-l] = delta@(self.ass[-l-1].T)  # производная J по весам L-l-го слоя
        return nabla_b, nabla_w

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        return (output_activations-y)

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.20)
y_train = y_train[:, np.newaxis]
y_test = y_test[:, np.newaxis]
train = np.hstack([x_train, y_train])
test = np.hstack([x_test, y_test])
train = [(d[:input_count][:, np.newaxis], np.eye(3, 1, k=-int(d[-1]))) for d in train]
test =  [(d[:input_count][:, np.newaxis], d[-1]) for d in test]

In [None]:
input_count  = 4
hidden_count = 6
output_count = 3

r = np.array([])
for i in range(10):
  nn = Network([input_count, hidden_count, output_count])
  r=   np.append( r,nn.SGD(training_data=train, epochs=500, mini_batch_size=10, eta=0.05, test_data=test))
print(np.mean(r))

0.9566666666666667


In [None]:
import matplotlib.pyplot as plt
from ipywidgets import *
@interact(layer1=IntSlider(min=0, max=10, continuous_update=False, description="1й внутренний слой: ", value=6),
          layer2=IntSlider(min=0, max=10, continuous_update=False, description="2й внутренний слой:"),
          layer3=IntSlider(min=0, max=10, continuous_update=False, description="3й внутренний слой: "),
          batch_size=BoundedIntText(min=1, max=40, value=10, description="Batch size: "),
          learning_rate=Dropdown(options=["0.01", "0.05", "0.1", "0.5", "1", "5", "10"],
                                 description="Learning rate: ")
         )
def learning_curve_by_network_structure(layer1, layer2, layer3, batch_size, learning_rate):
    layers = [x for x in [input_count, layer1, layer2, layer3, output_count] if x > 0]
    nn = Network(layers, output=False)
    learning_rate=float(learning_rate)

    CER = []
    cost_train = []
    cost_test  = []
    for _ in range(500):
        nn.SGD(training_data=train, epochs=1, mini_batch_size=batch_size, eta=learning_rate)
        CER.append(1 - nn.evaluate(test) / len(test))
        cost_test.append(cost_function(nn, test, onehot=False))
        cost_train.append(cost_function(nn, train, onehot=True))

    fig = plt.figure(figsize=(15,5))
    fig.add_subplot(1,2,1)
    plt.ylim(0, 1)
    plt.plot(CER)
    plt.title("Ошибка классификации")
    plt.ylabel("Доля неверно классифицированных объектов")
    plt.xlabel("Количество эпох")

    fig.add_subplot(1,2,2)
    plt.plot(cost_train, label="Отклонение при обучении", color="orange")
    plt.plot(cost_test, label="Отклонение при тестировании", color="blue")
    plt.title("Кривая обучения")
    plt.ylabel("Функция потерь")
    plt.xlabel("Количество эпох")
    plt.legend()
    plt.show()

interactive(children=(IntSlider(value=6, continuous_update=False, description='1й внутренний слой: ', max=10),…