# Self-Organizing Maps - SOMs

### Organizing animals

In [157]:
import numpy as np

In [158]:
data = np.genfromtxt('data_lab2/animals.dat',
                     dtype=None,
                     delimiter=',')
data = data.reshape(32,84)
data.shape

(32, 84)

In [159]:
data[0]

array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])

### Algorithm: for each sample
1. Calculate the similarity between the input pattern and the weights arriving
at each output node.
2. Find the most similar node; often referred to as the winner.
3. Select a set of output nodes which are located close to the winner in the
output grid. This is called the neighbourhood.
4. Update the weights of all nodes in the neighbourhood such that their
weights are moved closer to the input pattern.

In [160]:
def descending_logarithmic(x):
    A = 49
    k = -np.log(0.03) / 16  # Calcola k in modo che y sia 1 quando x è 16
    B = 0  # Per ottenere y = 1 quando x è 16
    return A * np.exp(-k * x) + B

In [161]:
def custom_sigmoid(x):
    k = 4  # Puoi regolare k per controllare la pendenza
    return 50 - (50 / (1 + np.exp(-k * (x - 15))))

In [162]:
import math

class SOM:
    def __init__(self, output_shape, data_dimensionality, step_size, neighborhood_size):
        self.output_shape = output_shape
        self.weight_matrix = np.random.random((output_shape, data_dimensionality))
        # self.weight_matrix = np.ones((output_shape, data_dimensionality))
        self.step_size = step_size
        self.neighborhood_size = neighborhood_size

    def find_winner(self, sample):
        distances = []
        for row in range(self.weight_matrix.shape[0]):
            d = np.linalg.norm(sample - self.weight_matrix[row,:])
            distances.append(d)
            # print("{}: {}".format(row, d))
        
        winner = np.argmin(np.array(distances))
        # print("winner is: {}".format(winner))
        return winner

    def get_neighborhood(self, winner):
        neighborhood = range(np.clip(winner - self.neighborhood_size, 0, None), np.clip(winner + self.neighborhood_size, None, self.output_shape - 1))
        return neighborhood
    
    def update_weights(self, neighborhood, sample):
        for row in range(self.weight_matrix.shape[0]):
            if row in neighborhood:
                # print("{} is in neighbors".format(row))
                # print("old row{}: {}".format(row, self.weight_matrix[row]))
                new_row = self.weight_matrix[row] + (sample - self.weight_matrix[row]) * self.step_size
                # print("new_row {}: {}".format(row, new_row))

    def fit(self, X, n_epochs):
        for e in range(n_epochs):
            # y = -3*e + 50
            # y = descending_logarithmic(e)
            y = custom_sigmoid(e)

            self.neighborhood_size = math.floor(y)
            # else:
            #     self.neighborhood_size = 1

            print("{}: {}".format(e, self.neighborhood_size))

            for row in range(X.shape[0]):
                winner = self.find_winner(X[row])
                neighborhood = self.get_neighborhood(winner)

                self.update_weights(neighborhood, X[row])

In [163]:
som = SOM(100, 84, 0.2, 50)

In [164]:
som.fit(data, 20)

0: 50
1: 50
2: 50
3: 50
4: 50
5: 50
6: 49
7: 49
8: 49
9: 49
10: 49
11: 49
12: 49
13: 49
14: 49
15: 25
16: 0
17: 0
18: 0
19: 0


In [165]:
som.weight_matrix

array([[0.3349908 , 0.11924137, 0.43018933, ..., 0.23105268, 0.20325419,
        0.31438278],
       [0.06345794, 0.67725415, 0.15500831, ..., 0.17103375, 0.99863344,
        0.1046623 ],
       [0.11434145, 0.89163578, 0.98460569, ..., 0.02383097, 0.76844143,
        0.8166585 ],
       ...,
       [0.11075478, 0.48907421, 0.69731791, ..., 0.330521  , 0.09044452,
        0.968474  ],
       [0.62890204, 0.89251646, 0.9211009 , ..., 0.26656757, 0.68316064,
        0.46358345],
       [0.06638455, 0.24862305, 0.76238581, ..., 0.63175102, 0.6422038 ,
        0.39662713]])

In [166]:
som.find_winner(data[0])

15

In [167]:
winnrz = []

for row in range(data.shape[0]):
    winnrz.append( (row, som.find_winner(data[row])) )

In [168]:
len(winnrz), winnrz

(32,
 [(0, 15),
  (1, 66),
  (2, 8),
  (3, 50),
  (4, 65),
  (5, 65),
  (6, 71),
  (7, 51),
  (8, 65),
  (9, 51),
  (10, 65),
  (11, 65),
  (12, 50),
  (13, 30),
  (14, 45),
  (15, 65),
  (16, 50),
  (17, 65),
  (18, 50),
  (19, 50),
  (20, 15),
  (21, 2),
  (22, 8),
  (23, 65),
  (24, 65),
  (25, 50),
  (26, 50),
  (27, 81),
  (28, 45),
  (29, 30),
  (30, 96),
  (31, 33)])

In [169]:
sorted_winnrz = sorted(winnrz, key=lambda x: x[1])
sorted_winnrz

[(21, 2),
 (2, 8),
 (22, 8),
 (0, 15),
 (20, 15),
 (13, 30),
 (29, 30),
 (31, 33),
 (14, 45),
 (28, 45),
 (3, 50),
 (12, 50),
 (16, 50),
 (18, 50),
 (19, 50),
 (25, 50),
 (26, 50),
 (7, 51),
 (9, 51),
 (4, 65),
 (5, 65),
 (8, 65),
 (10, 65),
 (11, 65),
 (15, 65),
 (17, 65),
 (23, 65),
 (24, 65),
 (1, 66),
 (6, 71),
 (27, 81),
 (30, 96)]

In [170]:
names = np.genfromtxt('data_lab2/animalnames.txt',
                     dtype=str)
names

array(["'antelop'", "'ape'", "'bat'", "'bear'", "'beetle'", "'butterfly'",
       "'camel'", "'cat'", "'crocodile'", "'dog'", "'dragonfly'",
       "'duck'", "'elephant'", "'frog'", "'giraffe'", "'grasshopper'",
       "'horse'", "'housefly'", "'hyena'", "'kangaroo'", "'lion'",
       "'moskito'", "'ostrich'", "'pelican'", "'penguin'", "'pig'",
       "'rabbit'", "'rat'", "'seaturtle'", "'skunk'", "'spider'",
       "'walrus'"], dtype='<U13')

In [171]:
for w in sorted_winnrz:
    print(names[w[0]], w[1])

'moskito' 2
'bat' 8
'ostrich' 8
'antelop' 15
'lion' 15
'frog' 30
'skunk' 30
'walrus' 33
'giraffe' 45
'seaturtle' 45
'bear' 50
'elephant' 50
'horse' 50
'hyena' 50
'kangaroo' 50
'pig' 50
'rabbit' 50
'cat' 51
'dog' 51
'beetle' 65
'butterfly' 65
'crocodile' 65
'dragonfly' 65
'duck' 65
'grasshopper' 65
'housefly' 65
'pelican' 65
'penguin' 65
'ape' 66
'camel' 71
'rat' 81
'spider' 96
