In [None]:
import numpy as np

class SOM:
    def __init__(self, input_size, num_nodes=100, initial_lr=0.2):
        self.num_nodes = num_nodes
        self.weights = np.random.rand(num_nodes, input_size)  # Random numbers between 0 and 1
        self.initial_lr = initial_lr
        self.lr = initial_lr
        self.neighborhood_size = None  # Will be updated during training

    def train(self, X, epochs):
        for epoch in range(epochs):
            # Decrease neighborhood size over time
            self.neighborhood_size = int(50 * (1 - epoch / epochs))
            # Ensure neighborhood size doesn't go below zero
            self.neighborhood_size = max(self.neighborhood_size, 0)
            for x in X:
                self.update_weights(x)
            

    def update_weights(self, x):
        winner = self.find_winner(x)
        # Compute indices of nodes within the neighborhood
        indices = np.arange(self.num_nodes)
        distances = np.abs(indices - winner)
        neighbor_indices = indices[distances <= self.neighborhood_size]
        # Update weights
        for i in neighbor_indices:
            self.weights[i] += self.lr * (x - self.weights[i])

    def find_winner(self, x):
        distances = np.linalg.norm(self.weights - x, axis=1)
        winner = np.argmin(distances)
        return winner

    def get_positions(self, X):
        positions = []
        for x in X:
            winner = self.find_winner(x)
            positions.append(winner)
        return positions


In [15]:
import numpy as np

# Load 'animals.dat'
with open('data_lab2/animals.dat', 'r') as f:
    data = f.read()

# Split the data by commas and convert to integers
values = list(map(int, data.strip().split(',')))

# Ensure we have the correct number of values (32 animals * 84 attributes = 2688)
assert len(values) == 32 * 84, f"Expected 2688 values, got {len(values)}"

# Reshape into a 32x84 numpy array
props = np.array(values).reshape(32, 84)


In [19]:
# Load 'animalnames.txt'
with open('data_lab2/animalnames.txt', 'r') as f:
    names_line = f.read()

# Split the names by tabs and strip the quotes
animal_names = [name.strip().strip("'") for name in names_line.strip().split('\n')]


# Ensure we have 32 animal names
assert len(animal_names) == 32, f"Expected 32 animal names, got {len(animal_names)}"


['antelop', 'ape', 'bat', 'bear', 'beetle', 'butterfly', 'camel', 'cat', 'crocodile', 'dog', 'dragonfly', 'duck', 'elephant', 'frog', 'giraffe', 'grasshopper', 'horse', 'housefly', 'hyena', 'kangaroo', 'lion', 'moskito', 'ostrich', 'pelican', 'penguin', 'pig', 'rabbit', 'rat', 'seaturtle', 'skunk', 'spider', 'walrus']


In [20]:


# Initialize the SOM
input_size = props.shape[1]  # 84 attributes
som = SOM(input_size=input_size)

# Train the SOM
epochs = 20
som.train(props, epochs)

# Get positions
positions = som.get_positions(props)

# Pair each animal with its position
animal_positions = list(zip(animal_names, positions))

# Sort animals based on their positions
sorted_animals = sorted(animal_positions, key=lambda x: x[1])

# Print the sorted animal names
for name, pos in sorted_animals:
    print(f"{name} at position {pos}")




beetle at position 0
dragonfly at position 0
grasshopper at position 0
butterfly at position 5
housefly at position 8
moskito at position 8
spider at position 13
duck at position 23
pelican at position 23
penguin at position 28
ostrich at position 31
frog at position 36
seaturtle at position 41
crocodile at position 42
walrus at position 49
dog at position 54
hyena at position 59
bear at position 60
lion at position 64
ape at position 67
cat at position 70
skunk at position 73
bat at position 78
rat at position 81
elephant at position 85
rabbit at position 88
kangaroo at position 91
antelop at position 93
horse at position 96
camel at position 99
giraffe at position 99
pig at position 99
