# Hebb's learning
- Basic neuron
  - load XML, rescale
  - generate random values that are linearly separable in R^2 (0;1)
- plot points
- learn
- plot points with neuron's line

In [2]:
import numpy as np
import math
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

In [3]:
plt.style.use("ggplot")

In [4]:
def parse_xml(filename):
    def parse_inputs(perceptron):
        for input in perceptron.findall("inputDescriptions"):
            yield {
                'min': float(input.find('minimum').text),
                'max': float(input.find('maximum').text),
                'name': input.find('name').text
            }
            
    def parse_dataset_x(dataset):
        for element in dataset.findall("element"):
            x = list(map(lambda value: float(value.text), element.find("inputs").findall("value")))
            yield x
            
    def parse_dataset_y(dataset):
        for element in dataset.findall("element"):
            y = float(element.find("output").text)
            yield y
    
    tree = ET.parse(f"{filename}")
    root = tree.getroot()
    inputs = list(parse_inputs(root.find("perceptron")))
    train_x = list(parse_dataset_x(root.find("TrainSet")))
    train_y = list(parse_dataset_y(root.find("TrainSet")))
    test = list(parse_dataset_x(root.find("TestSet")))
    
    return (inputs, (train_x, train_y), test)

In [291]:
def plot_train(x, y):
    x0 = list(map(lambda x: x[0], x))
    x1 = list(map(lambda x: x[1], x))
    
    x0a, x1a = [], [] # 0
    x0b, x1b = [], [] # 1
    for xi, y in zip(x, y):
        if y == 0:
            x0a.append(xi[0])
            x1a.append(xi[1])
        else:
            x0b.append(xi[0])
            x1b.append(xi[1])
    
    plt.scatter(x0a, x1a, marker="o", label="train 0", c="r")
    plt.scatter(x0b, x1b, marker="x", label="train 1", c="b")
    
def plot_test(x):
    x0 = list(map(lambda x: x[0], x))
    x1 = list(map(lambda x: x[1], x))
    plt.scatter(x0, x1, s=100, marker="$?$", label="test", c="k")
    
def plot_predicted(x, y):
    x0 = list(map(lambda x: x[0], x))
    x1 = list(map(lambda x: x[1], x))
    
    x0a, x1a = [], [] # 0
    x0b, x1b = [], [] # 1
    for xi, y in zip(x, y):
        if y == 0:
            x0a.append(xi[0])
            x1a.append(xi[1])
        else:
            x0b.append(xi[0])
            x1b.append(xi[1])
    
    plt.scatter(x0a, x1a, marker="o", label="predict 0", c="r", s=100)
    plt.scatter(x0b, x1b, marker="x", label="predict 1", c="b", s=100)

In [6]:
def rescale(inputs, dataset):    
    result = []
    for x in dataset:
        record = []
        for i, xi in enumerate(x):
            min = inputs[i]["min"]
            max = inputs[i]["max"]
            record.append((xi - min) / (max - min))
        result.append(record)
    return result

In [292]:
class Neuron:
    def __init__(self, n):
        self.n = n
        self.w = np.random.rand(n + 1)
        self.w[0] = 1
        
    def train(self, data_x, data_y):
        learning_rate = 0.2
        
        epoch = 0
        while True:
            epoch += 1
            wrong = 0
            for input, y_expected in zip(data_x, data_y):
                #print(f"input = {input}, expected output = {y_expected}")
                x = np.append([1], input)
                #print(f"x = {x}")
                y_real = self._predict(x)
                #print(f"actual output = {y_real}")
                delta = y_expected - y_real
                
                if delta != 0:
                    wrong += 1

                #print(f"delta = {delta}")
                #print(f"before = {self.w}")
                self.w = self.w + learning_rate * delta * x
                #print(f"after = {self.w}")
            
            print(f"epoch = {epoch}, score={len(data_x) - wrong}/{len(data_x)}")
            if wrong == 0:
                break
        
    def predict(self, inputs):
        predictions = []
        for input in inputs:
            x = np.append([1], input)
            predicted = self._predict(x)
            predictions.append(predicted)
        return predictions
    
    def _predict(self, x):        
        y = np.dot(self.w, x)
        y = self._sign(y)
        return y
    
    def _sign(self, x):
        return 0 if x <= 0 else 1

In [319]:
neuron = Neuron(2)

inputs, (train_x, train_y), test = parse_xml("obdelnik_rozsah.xml")

neuron.train(train_x, train_y)

neuron.w

epoch = 1, score=2/6
epoch = 2, score=1/6
epoch = 3, score=6/6


array([ 1.2       ,  2.40590853, -2.81865757])

In [None]:
def solve(filename):
    inputs, (train_x, train_y), test = parse_xml(filename)
    
    #print(inputs, train_x, train_y, test)
    
    input_dim = len(inputs)
    
    if input_dim == 2:
        fig = plt.figure(figsize=(24, 8))
        fig.suptitle(filename)
        plt.subplot(1, 3, 1)
        plt.title("original")
        plot_train(train_x, train_y)
        plot_test(test)
        plt.legend()
    
    train_x = rescale(inputs, train_x)
    test = rescale(inputs, test)
    
    if input_dim == 2:
        plt.subplot(1, 3, 2)
        plt.title("rescaled")
        plot_train(train_x, train_y)
        plot_test(test)
        plt.legend()
    
    
    neuron = Neuron(input_dim)
    
    neuron.train(train_x, train_y)
        
    if input_dim == 2:
        plt.subplot(1, 3, 3)
        plt.title("result")
        plot_train(train_x, train_y)
        plot_predicted(test, neuron.predict(test))
        plt.legend()
        
        c, a, b = neuron.w
        # ax + by + c = 0

        # x = 0:
        # by + c = 0
        # y = -c / b

        # y = 0:
        # ax + c = 0
        # x = -c / a

        #px = -c / a
        #py = -c / b
        #print(f"px=[0, {px}]")
        #print(f"py=[{py}, 0]")
        #plt.plot([0, px], [py, 0], label="neuron", color='k', linestyle='-', linewidth=2)
        #plt.xticks(np.arange(0, 1.1, 0.1))
        #plt.yticks(np.arange(0, 1.1, 0.1))
        plt.show()
        
    #print(neuron.w)

solve("obdelnik_rozsah.xml")
#solve("t2r.xml")

In [None]:
files = ["obdelnik_rozsah.xml", "t1r.xml", "t2r.xml", "t3r.xml", "t4r.xml", "t5r.xml", "t6r.xml", "t7r.xml"]
for file in files:
    solve(file)