In [47]:
import pandas as pd
import numpy as np
import networkx as nx

In [48]:
import holoviews as hv
import panel as pn
import hvplot
import hvplot.pandas
from holoviews import opts
hv.extension('bokeh')
defaults = dict(width=400, height=400)
hv.opts.defaults(
    opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))

In [49]:
from sklearn.datasets import load_iris

iris = load_iris(as_frame=True)
x_label='sepal length (cm)'
y_label='petal length (cm)'
cols = list(iris.frame.columns)[:-1]
df=iris.frame

In [50]:
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
X = df.iloc[:,0:4]
X = StandardScaler().fit_transform(X)
labels=df['target']

In [51]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2, store_covariance=True) 
lda.fit(X, df['target'])
X = lda.transform(X)

In [52]:
X_df=pd.DataFrame(X,columns= ['X','Y'])
X_df['labels']=labels
colors=['blue','green','orange','gray']
colors1=['gray','blue','green','orange']

In [53]:
data_plot=X_df.hvplot.scatter('X', 'Y', by='labels', marker='o', size=100, color=colors  ).opts(
                                height=500, width=700,
                                tools=['hover'],  toolbar= 'left',
                                title="Пространство признаков", legend_position='right', show_grid=True,
                               
    )
data_plot

In [54]:
# Set random weights with the same dimensions as the data
grid_size = int(np.ceil(np.sqrt(X.shape[0])))
d = 1
xx = np.linspace(-d, d, grid_size)
yy = np.linspace(-d, d, grid_size)
W = np.array([[x, y] for y in yy for x in xx])

In [55]:
# Use networkx to define topology and neighbourhood connections
# The i'th row of the weights maps to the i'th node in the network
# Using a lattice as an example
G = nx.grid_2d_graph(m=grid_size, n=grid_size)
position = nx.spring_layout(G, scale=1)

# Map coordinates to index and index to coordinates
c2i = {c: i for i, c in enumerate(G.nodes())}
i2c = {v: k for k, v in c2i.items()}
# If there are more nodes than weights, append difference to match size

diff = np.abs(len(G.nodes()) - W.shape[0])
if diff > 0:
    W = np.concatenate([W, np.random.random(size=(diff, W.shape[1]))])

In [56]:
for i, c in enumerate(G.nodes()):
    position[c]=np.array(W[i])
node_plot=hv.Graph.from_networkx(G, position)
node_plot.opts(inspection_policy='nodes', edge_color='gray', edge_hover_line_color='black', edge_alpha=0.5,
               node_color='gray', node_hover_fill_color='black', node_alpha=0.5,
               node_size=10 )
data_plot*node_plot

In [57]:
# Function for learning rate
def f1(x, k):
    return np.exp(x * k)

# Function for neighbourhood restraint
def f2(x, s):
    return np.exp(-(x / (s ** 2)))

# Compute learning rate
def learning_rate(s, k):
    return f1(x=s, k=k)

# Compute neighbourhood restraint
def restraint(G, best, n, sigma=1):
    dist = nx.shortest_path_length(G, source=best, target=n)
    return f2(dist, s=sigma)

In [58]:
import time
import progressbar

# Keep track of which unit (neuron) won what
winners = {i: [] for i in range(W.shape[0])}

max_iter = 15000 # 10000

weights = []
weights.append(np.copy(W))

with progressbar.ProgressBar(max_value=max_iter, enable_colors=False) as bar:
    # Start training loop
    for s in range(max_iter):
        bar.next()
        # Pick random data point
        r_idx = np.random.randint(X.shape[0])
        x = X[r_idx, :]
    
        # Find the best matching unit (BMU) using Euclidean distance
        x_stack = np.stack([x]*W.shape[0], axis=0)
        dists = np.linalg.norm(x_stack - W, axis=1)
        best_idx = np.argmin(dists)
    
        # Set learning rate
        k = -(1/1000) #1000
        a = learning_rate(s, k)
        
        
        # Update weights
        W[best_idx, :] = W[best_idx, :] + a * (x - W[best_idx, :])
        
    
        # Add the index of the data point to the "won" list
        winners[best_idx].append(r_idx)
    
        
        # Update weights of neighbours
        immediate_n = list(G[i2c[best_idx]])
        for n in immediate_n:
           W[c2i[n], :] = W[c2i[n], :] + restraint(G, i2c[best_idx], n) * a * (x - W[c2i[n], :])
        
    
        # #Update all weights
        # for n in G.nodes():
        #      W[c2i[n], :] = W[c2i[n], :] + restraint(G, i2c[best_idx], n) * a * (x - W[c2i[n], :])
            
        weights.append(np.copy(W))

100% (15000 of 15000) |##################| Elapsed Time: 0:00:02 Time:  0:00:02


In [59]:
n_iter = pn.widgets.IntSlider(name='Итерация', start=0, end=max_iter-1, value=0,  width = 250)

def get_net(n_iter):
    Net_weights=weights[n_iter]
    for i, c in enumerate(G.nodes()):
        position[c]=np.array(Net_weights[i])
    node_plot=hv.Graph.from_networkx(G, position)
    node_plot.opts(inspection_policy='nodes', edge_color='gray', edge_hover_line_color='black', edge_alpha=0.5,
               node_color='gray', node_hover_fill_color='black', node_alpha=0.5,
               node_size=10 )
    return (data_plot*node_plot)

pn.Row(
       
    pn.pane.HoloViews(
        pn.bind(get_net, n_iter)
    ).servable(),
    pn.WidgetBox(
        pn.Column(
            "Карта Кононена",
            n_iter,
            height = 400,
            ).servable(target='sidebar')
    ),
)
    
    

In [60]:
# Match unit to most "won" data point, else sign no label
def best_match(idxs):
    idx = None
    if len(idxs) > 0:
        idx = max(idxs, key=idxs.count)
    
    return idx

unit_match = {i: best_match(idxs) for i, idxs in winners.items()}

# Match unit with the label, if assigned index
unit_label = {i: labels[idx] for i, idx in unit_match.items() if idx}

# Plot the results as a scatter plot (optional, try heat map)
results = np.array([list(i2c[k]) for k in unit_label.keys()])
pred = list(unit_label.values())

unit_wins= [(i2c[k]) for k in unit_label.keys()]
unit_lose= set(G.nodes) - set(unit_wins)

node_lose= dict.fromkeys(unit_lose, -1)
node_wins = dict(zip(unit_wins, pred))

In [61]:
#nx.get_node_attributes(G,'cluster')

In [62]:
nx.set_node_attributes(G, node_lose | node_wins, 'cluster')
node_plot=hv.Graph.from_networkx(G, position)
node_plot.opts(inspection_policy='nodes', edge_color='gray', edge_hover_line_color='black', edge_alpha=0.5,
                node_hover_fill_color='black', node_alpha=0.95,
               node_size=10, node_color='cluster', cmap=colors1)
data_plot*node_plot

In [63]:
print("Missing nodes:")
print([i2c[k] for k, v in unit_match.items() if not v])

Missing nodes:
[(0, 6), (0, 7), (0, 8), (1, 6), (1, 7), (2, 6), (2, 7), (3, 7), (3, 8), (4, 7), (4, 8), (5, 7), (5, 8), (6, 7), (6, 8), (7, 7), (7, 8), (7, 9), (8, 4), (8, 5), (8, 6), (8, 7), (8, 8), (8, 9), (8, 10), (8, 11), (9, 5), (9, 6), (9, 7), (9, 8), (9, 9), (9, 10), (9, 11), (9, 12), (10, 4), (10, 5), (10, 8), (10, 9), (10, 10), (10, 11), (10, 12), (11, 3), (11, 4), (11, 8), (11, 10), (11, 11), (11, 12), (12, 9), (12, 10), (12, 11), (12, 12)]


In [64]:
win_labels_df=pd.DataFrame( [(i2c[i]+(i,))  for i, v in unit_match.items() if v], columns=['X','Y','Text'])
lose_labels_df=pd.DataFrame( [(i2c[i]+(i,))  for i, v in unit_match.items() if not v], columns=['X','Y','Text'])

In [65]:
res_df=pd.DataFrame(results,columns= ['X','Y'])
res_df['labels']=pred
res_plot=res_df.hvplot.scatter('X', 'Y', by='labels', marker='o', size=200, color=colors).opts(
                                height=500, width=700,
                                tools=['hover'],  toolbar= 'left',
                                title="Карта Кохонена", legend_position='right', show_grid=True,
                               
    )
win_labels_plot=hv.Labels(win_labels_df, kdims=['X','Y'], vdims=['Text']).opts(text_font_size='6pt', text_color ='cyan')
lose_plot=hv.Labels(lose_labels_df, kdims=['X','Y'], vdims=['Text']).opts(text_font_size='6pt', text_color ='red')
res_plot*win_labels_plot*lose_plot

In [66]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
X_s=StandardScaler().fit_transform(df.iloc[:,0:4])

In [67]:
#!pip install minisom

In [68]:
# ! pip install minisom
from minisom import MiniSom
m=10
n=10
som = MiniSom(m, n, 4, sigma=3, learning_rate=0.1, neighborhood_function='triangle', random_seed=10)
som.pca_weights_init(X_s)
som.train_random(X_s, 1000, verbose=0)

  self._weights[i, j] = c1*pc[pc_order[0]] + \


In [69]:
def classify(som, data):
    """Classifies each sample in data in one of the classes definited
    using the method labels_map.
    Returns a list of the same length of data where the i-th element
    is the class assigned to data[i].
    """
    winmap = som.labels_map(X_s, labels)
    default_class = np.sum(list(winmap.values())).most_common()[0][0]
    result = []
    for d in data:
        win_position = som.winner(d)
        if win_position in winmap:
            result.append(winmap[win_position].most_common()[0][0])
        else:
            result.append(default_class)
    return result

In [70]:
print(classification_report(labels, classify(som, X_s)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       0.98      1.00      0.99        50
           2       1.00      0.98      0.99        50

    accuracy                           0.99       150
   macro avg       0.99      0.99      0.99       150
weighted avg       0.99      0.99      0.99       150



In [71]:
m_size=25
d_map=som.distance_map().T
map_plot=hv.Image(d_map, bounds=(0,0,n,m))
map_plot.opts(title="Карта Кохонена", height=n*m_size, width=m*m_size,  cmap='hot', colorbar=True)
win_map=[(som.winner(xx)[0]+0.5,som.winner(xx)[1]+0.5) for cnt, xx in enumerate(X_s)]
win_df=pd.DataFrame(win_map,columns= ['X','Y'])
win_df['labels']=labels
win_plot=win_df.hvplot.scatter('X', 'Y', by='labels', marker='o', size=150, color=colors).opts(
                                
                                tools=['hover'],  toolbar= 'left',
                                title="Пространство признаков", legend_position='right', show_grid=True,
                               
    )
(map_plot*win_plot).opts(height=500, width=700 )

In [72]:
def som_fit(X, n_clusters):
    som_shape = (1, n_clusters)
    som = MiniSom(som_shape[0], som_shape[1], X.shape[1], sigma=.5, learning_rate=.5,
                  neighborhood_function='gaussian', random_seed=12345)
    epochs = 5000
    som.train_batch(X, epochs, verbose=False)
    return (som)

In [73]:
x_label='sepal length (cm)'
y_label='petal width (cm)'

In [74]:
x = pn.widgets.Select(name='x', options=cols, value = x_label)
y = pn.widgets.Select(name='y', options=cols, value = y_label)

n_clusters = pn.widgets.IntSlider(name='n_clusters', start=1, end=12, value=3)
check = pn.widgets.Checkbox (name='Исходные классы', value = False)

np.random.seed(1234)

def get_clusters(x, y, n_clusters, check):
    
    som = som_fit(X, n_clusters)
    som_shape = (1, n_clusters)
    winner_coordinates = np.array([som.winner(x) for x in X]).T
       
    df['cluster'] = np.ravel_multi_index(winner_coordinates, som_shape)
    cluster_centers = df.groupby('cluster')[[x] if x == y else [x, y]].mean()
    
    cluster_plot=df.hvplot.scatter(x, y, by='cluster', marker='o', size=100,  color=['orange', 'blue', 'green', 'cyan', 'magenta', 'yellow', 'red'])
    
    class_plot=df.hvplot.scatter(x, y, by='target', marker='o', size=50,  color=['blue', 'orange', 'green'])
    
    cluster_centers_plot= hv.Scatter(cluster_centers,  label='Cluster centers')
    cluster_centers_plot.opts(color='k', marker='x', size=15, line_width=5)
    
    centers = df.groupby('target')[[x] if x == y else [x, y]].mean()
    centers_plot= hv.Scatter(centers,  label='Class centers')
    centers_plot.opts( marker='x', size=15, line_width=5, color='red')
    
    #* class_plot*  centers_plot
    fig =  (cluster_plot  * class_plot * cluster_centers_plot * centers_plot) if check else (cluster_plot  * cluster_centers_plot)
    fig.opts(xlabel=x, ylabel=y, width=600, height=500, tools=['hover'], 
             title="Iris K-Means++ Clustering", legend_position='top_left', show_grid=True)
    return ( fig  )
    

pn.Row(
    pn.pane.HoloViews(
        pn.bind(get_clusters, x, y, n_clusters, check)
    ).servable(),
    
    pn.WidgetBox(
        pn.Column(
            "Признаки",
            x, y, check, 
            "Кластеризация SOM",
            n_clusters
        ).servable(target='sidebar')
    ),
)