In [312]:
import numpy as np
import math
import random
import itertools
from scipy.spatial import distance

In [26]:
def normalize_vector(vector):
    '''
    Returns the normalized vector of the provided one, a length 1
    vector with the same direction as the original.
    '''
    vector = np.array(vector)
    return (vector/math.sqrt(np.sum(pow(vector, 2)))).tolist()

In [27]:
def extended_normalization(vector):
    '''
    When two vectors of different lengths but in the same direction are normalized, 
    the result will be the same for both of them.
    
    In order to avoid collisions of normalized vectors, an extended normalization can
    be applied, where an extra dimension is added to the vector to be normalized. By
    adding a component of length 1 in the new dimension and normalizing the result,
    it is ensured that no two vectors which were originally in the same dimensions
    will produce the same output.
    '''
    vector = np.append(vector, 1)
    return (vector/math.sqrt(np.sum(pow(vector, 2)))).tolist()

In [285]:
def get_random_unit_vector(components):
    '''
    Returns a random vector of length 1 with the required number of components
    '''
    return normalize_vector([random.random() for i in range(components)])

In [320]:
class SOM:
    def __init__ (self, dimensions, topology='rectangular'):
        '''
        dimensions - Array-like, containing the size of the output
        layer in each dimension.
        
        topology - The neighborhood relation between output-layer neurons.
        The default topology is rectangular. A future improvement would be
        to implement a hexagonal topology option.
        '''
        self.dimensions = dimensions
        self.output_layer = None
        return
    def __weight_initialization(self, num_attr):
        '''
        Initializes the output layer to a matrix with the dimensions specified
        for this map, filled with random unit vectors of dimension num_attr
        
        num_attr - The number of attributes of the instances this network will
        be trained with
        '''
        self.output_layer = np.zeros(self.dimensions, dtype=object)
        #List with the indexes in each dimension, the cartesian product of
        #the lists of indexes is a list with the coordinates to all the 
        #positions of the output layer matrix
        indexes_list = [list(range(i)) for i in self.dimensions]
        for i in itertools.product(*indexes_list):
            self.output_layer[i] = get_random_unit_vector(num_attr)
        #Maybe cast the output layer to a regular list here?
        return
    def __get_output(self, instance):
        '''
        Calculates the output of each neuron for the provided instance.
        Returns a matrix with the same dimensions as the output layer
        containing the outputs
        '''
        output_matrix = np.zeros(self.dimensions)
        indexes_list = [list(range(i)) for i in self.dimensions]
        for i in itertools.product(*indexes_list):
            output_matrix[i] = distance.euclidean(instance, self.output_layer[i])
        return output_matrix
    def fit(self, X):
        '''
        Initializes the output layer neurons to random unit vectors, normalizes
        the input vectors, and applies the learning algorithm to the input data, X
        
        X - The data to train the algorithm on
        '''
        #Since extended normalization will be used, the inputs will have an extra 
        #dimension, which is why I add 1 here so the output layer vectors will have
        #the number of attributes of an instance plus 1 components
        #Weight initialization
        self.__weight_initialization(len(X[0]) + 1)
        #Input data normalization
        X = [extended_normalization(v) for v in X]
        return
    def predict(self, X):
        return

In [321]:
som = SOM([10, 10])
som.fit(data)

[[0.53140069 0.51116351 0.72665888 0.73221617 0.7292312  0.86187053
  0.44459233 0.61899012 0.28914073 0.32783975]
 [0.86321128 0.47212694 0.63517854 0.39726114 0.49018556 0.39903435
  0.81732227 0.33392835 0.31594633 0.67033569]
 [0.51503338 0.65596931 0.50915807 0.65641837 0.84149039 0.85581515
  0.44771864 0.6389543  0.23241105 0.8517861 ]
 [0.76746164 0.52308567 0.22763683 0.83707329 0.42804748 0.60934735
  0.57815097 0.51359833 0.68521157 0.32442415]
 [0.3391126  0.64695869 1.02234237 0.41947211 0.53070272 0.6239535
  0.79567247 0.57043079 0.3622835  0.30863071]
 [0.43454482 0.80096401 0.61395274 0.73393951 0.85132763 0.66673545
  0.58908355 0.42468788 0.51606853 0.64086274]
 [0.58397869 0.74652279 0.80623368 0.11474954 0.91032243 0.90334074
  0.63410124 0.81507237 0.64493158 0.65846022]
 [0.58516662 0.60209856 0.81805269 0.70397758 0.21841631 0.16183329
  0.8806946  0.62784617 0.36279974 0.45393279]
 [0.68567305 0.5740729  0.72375571 0.48739988 0.68899788 0.83228789
  0.86068491 

In [298]:
data = [[1,2,3],
        [1,1,1],
        [2,3,4],
        [4,2,4],
        [2,2,5]]