### Bibliotekos ir random seed nustatymas

In [162]:
import random
import math
import numpy as np
import pandas
random.seed(5)
np.random.seed(5)

### SOM klasė
Naudojama gauso funkcija, euklido atstumų funkcija, a(t, T) = 1 - (t / T)

In [163]:
class SOM:
    def __init__(self, map_size, input_dimensions) -> None:
        self.map_size = map_size
        self.map = [[[random.random() for _ in range(input_dimensions)] for _ in range(map_size[1])] for _ in range(map_size[0])]
        self.map = np.array(self.map)
        pass
    
    def train(self, inputs, iteration_count):
        for t in range(1, iteration_count + 1):
            for input in inputs:
                x, y, _ = self.get_winner(input)

                for i in range(self.map_size[0]):
                    for j in range(self.map_size[1]):
                        neighbor_number = max(abs(x - i), abs(y - j)) + 1
                        distance_between_neuron_indexes = self.__euclidean_distance([x, y], [i, j])
                        
                        self.map[i][j] += self.__gaussian_neighborhood_func(distance_between_neuron_indexes, t, iteration_count, neighbor_number) * (input - self.map[i][j])
              
    def train_alternative(self, inputs, iteration_count):
        for t in range(1, iteration_count + 1):
            input = inputs[random.randrange(0, len(inputs))]
            x, y, _ = self.get_winner(input)
            
            for i in range(self.map_size[0]):
                for j in range(self.map_size[1]):
                    neighbor_number = max(abs(x - i), abs(y - j)) + 1
                    distance_between_neuron_indexes = self.__euclidean_distance([x, y], [i, j])
                    
                    self.map[i][j] += self.__gaussian_neighborhood_func(distance_between_neuron_indexes, t, iteration_count, neighbor_number) * (input - self.map[i][j])

    def get_mapped_inputs(self, inputs, classes):
        mapped_inputs = [[ [] for _ in range(self.map_size[1])] for _ in range(self.map_size[0])]

        for input, clss in zip(inputs, classes): 
            x, y, _ = self.get_winner(input)
            mapped_inputs[x][y].append(clss)

        return mapped_inputs
       
    def get_quantization_error(self, inputs):
        distances_sum = 0
        
        for input in inputs: 
            _, _, distance = self.get_winner(input)
            distances_sum += distance

        return distances_sum / len(inputs)
        
    def __euclidean_distance(self, x, y):
        entries_sum = 0
        for x_i, y_i in zip(x, y):
            entries_sum += (x_i - y_i)**2
        
        return math.sqrt(entries_sum)
    
    def get_winner(self, input):
        winner_info = (0, 0, self.__euclidean_distance(self.map[0][0], input))
        
        for i in range(self.map_size[0]):
            for j in range(self.map_size[1]):
                distance = self.__euclidean_distance(self.map[i][j], input)
                winner_info = (i, j, distance) if distance < winner_info[2] else winner_info
        
        return winner_info
    
    def __gaussian_neighborhood_func(self, distance, iteration, iteration_count, neighbor_number ):
        return self.__learning_function(iteration, iteration_count) * math.exp( (-distance ** 2) / (2 * (neighbor_number ** 2)))
          
    def __learning_function(self, iteration, iteration_count):
        return 1 - (iteration / iteration_count) 

In [164]:
inputs = np.loadtxt("iris.data", delimiter=",", usecols=range(0,4))
classes = np.loadtxt("iris.data", delimiter=",", usecols=range(4,5), dtype=np.unicode_)

#zipped = list(zip(inputs, classes))
#np.random.shuffle(zipped)
#inputs, classes = zip(*zipped)


In [165]:
som = SOM((10, 10), len(inputs[0]))
som.train(inputs, 1000)

mapped_inputs = som.get_mapped_inputs(inputs, classes)
print("Quantization error:", som.get_quantization_error(inputs))


Quantization error: 2.2271434414825877


In [166]:
for i in range(len(mapped_inputs)):
    for j in range(len(mapped_inputs[0])):
        unique_classes = np.unique(mapped_inputs[i][j])
        mapped_inputs[i][j] = ", ".join(unique_classes) 
    
pandas.DataFrame(mapped_inputs)

Unnamed: 0,0,1,2,3,4
0,Iris-virginica,,,,"Iris-versicolor, Iris-virginica"
1,,,,,"Iris-setosa, Iris-versicolor, Iris-virginica"
2,,,,Iris-virginica,
3,,Iris-virginica,,,
4,Iris-virginica,Iris-virginica,Iris-virginica,,
