### Imports and loading data

In [6]:
# Read data from Java SOMToolbox
from SOMToolBox_Parse import SOMToolBox_Parse
from minisom import MiniSom
from somtoolbox import SOMToolbox
import pickle
import collections
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os.path
from os import path

#load iris data
idata = SOMToolBox_Parse("datasets\\iris\\iris.vec").read_weight_file()
weights = SOMToolBox_Parse("datasets\\iris\\iris.wgt.gz").read_weight_file()
classes = SOMToolBox_Parse("datasets\\iris\\iris.cls").read_weight_file()

#load 10clusters data
clusters_data = SOMToolBox_Parse("datasets\\10clusters\\10clusters.vec").read_weight_file()
clusters_classes = SOMToolBox_Parse("datasets\\10clusters\\10clusters.cls").read_weight_file()
clusters_weight = SOMToolBox_Parse("datasets\\10clusters\\10clusters.wgt").read_weight_file()

#load chainlink data
chain_data = SOMToolBox_Parse("datasets\\chainlink\\chainlink.vec").read_weight_file()
chain_classes = SOMToolBox_Parse("datasets\\chainlink\\chainlink.cls").read_weight_file()
chain_weight = SOMToolBox_Parse("datasets\\chainlink\\chainlink.wgt").read_weight_file()

### Functions for the adjacency maps needed for mnemonic SOM

In [7]:
#takes a black-white image and returns a np-array with 1 for white pixels and 0 for black pixels
def read_bw_silhouette(filename):
    img = Image.open(filename).convert('L')
    return np.array(img) / 255

#breadth first search for pathfinding between two points in the matrix
def bfs(grid, start, wall, goal):
    max_x, max_y = grid.shape[0],grid.shape[1]
    queue = collections.deque([[start]])
    seen = set([start])
    while queue:
        path = queue.popleft()
        x, y = path[-1]
        if grid[x][y] == goal:
            return path
        for x2, y2 in ((x+1,y), (x-1,y), (x,y+1), (x,y-1)):
            if 0 <= x2 < max_x and 0 <= y2 < max_y and grid[x2][y2] != wall and (x2, y2) not in seen:
                queue.append(path + [(x2, y2)])
                seen.add((x2, y2))

In [8]:
#convert image array to a dictionary, mapping each point (x,y) to its adjacency matrix
def image_array_to_adjacency_mapping(img_np):
    adjacency_dict = {}
    cur_result_matrix = np.zeros((img_np.shape[0], img_np.shape[1]))
    cur_outer_zero = False
    cur_inner_zero = False

    for i in range(img_np.shape[0]):
        for j in range(img_np.shape[1]):
            cur_result_matrix = np.zeros((img_np.shape[0], img_np.shape[1]))
            if (img_np[i, j] == 0.0):
                cur_outer_zero = True
            for k in range(img_np.shape[0]):
                for l in range(img_np.shape[1]):
                    if (img_np[k, l] == 0.0):
                        cur_inner_zero = True
                    if (cur_outer_zero or cur_inner_zero): #set 0 if either i,j or k,l is "empty"
                        cur_result_matrix[k, l] = 0
                    elif (i == k and j == l): #set 1 for distance of point to itself
                        cur_result_matrix[k, l] = 1
                    else: #calculate manhattan-distance for all others
                        img_np[k, l] = 100.0  # set goal
                        path = bfs(img_np, (i, j), 0.0, 100.00)
                        if (path == None):
                            print("Unable to find path between nodes ", (i, j), "and", (k, l))
                            break
                        else:
                            cur_result_matrix[k, l] = len(path)
                            # reset
                            img_np[k, l] = 1.0
                    cur_inner_zero = False
            cur_result_matrix[np.nonzero(cur_result_matrix)] = 1 / cur_result_matrix[np.nonzero(cur_result_matrix)]
            adjacency_dict[i, j] = cur_result_matrix
            cur_outer_zero = False
    return adjacency_dict

### Training mnemonic SOM in the shape of austria with 10clusters data and iris data

In [12]:
#load sillhouette image and transform to numpy array:
img_arr = read_bw_silhouette('austria.png')
print(img_arr.shape)

#create adjacancy dictionary or load if already exists (pickled):
adjacency_dict = {}
if (path.exists("adjacency_austria.pickle")):
    with open('adjacency_austria.pickle', 'rb') as handle:
        adjacency_dict = pickle.load(handle)
else:
    adjacency_dict = image_array_to_adjacency_mapping(img_arr)
    with open("adjacency_austria.pickle", "wb") as output_file:
        pickle.dump(adjacency_dict, output_file)

#train mnemonic SOM
data = clusters_data
som = MiniSom(20, 40, data['vec_dim'], sigma=2, learning_rate=1, neighborhood_function = 'lookup',
              activation_distance='manhattan_mnemonic' , lookuptable = adjacency_dict,
              mnemonic_shape = img_arr)
som.train(data['arr'], 1000)

#visualize
sm = SOMToolbox(weights=som._weights.reshape(-1, data['vec_dim']), 
                n=40, m=20, dimension=data['vec_dim'], input_data=data['arr'])
sm._mainview

(20, 40)


In [13]:
#train mnemonic SOM on iris data, result can be compared to the 2005 paper 
#(https://publik.tuwien.ac.at/files/pub-inf_2979.pdf), Figure 3
data = idata
som = MiniSom(20, 40, data['vec_dim'], sigma=2, learning_rate=1, neighborhood_function = 'lookup',
              activation_distance='manhattan_mnemonic' , lookuptable = adjacency_dict,
              mnemonic_shape = img_arr)
som.train(data['arr'], 1000)

#visualize
sm = SOMToolbox(weights=som._weights.reshape(-1, data['vec_dim']), 
                n=40, m=20, dimension=data['vec_dim'], input_data=data['arr'])
sm._mainview

#### Normal SOM trained in SOMToolbox and imported

In [14]:
# SOM trained on cluster dataset from SOMToolbox page

sm = SOMToolbox(weights=clusters_weight['arr'],m=clusters_weight['ydim'],n=clusters_weight['xdim'],
                dimension=clusters_weight['vec_dim'], input_data=clusters_data['arr'],
               classes=clusters_classes['arr'], component_names=clusters_classes['classes_names'])
sm._mainview