In [2]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from copy import deepcopy
import random
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation, DBSCAN
from msi_dimension_reducer import PCA, UMAP
from pyclusterbdmseed.algorithms import H2SOM
import pyclusterbdmseed.core as core
from scipy.spatial.distance import pdist, squareform
import seaborn as sns
import colorsys

In [16]:
def create_empty_img(dframe):
    gx = dframe.index.get_level_values("grid_x").astype(int)
    gy = dframe.index.get_level_values("grid_y").astype(int)
    img = np.zeros((gy.max()+1, gx.max()+1))
    return img

def create_binary_img(dframe):
    gx = dframe.index.get_level_values("grid_x").astype(int)
    gy = dframe.index.get_level_values("grid_y").astype(int)
    img = np.zeros((gy.max()+1, gx.max()+1))
    img[(gy, gx)] = 1
    return img

def cart2polar(x,y):
    theta = np.arctan2(y,x)
    r = np.sqrt(x**2 + y**2)
    return theta, r

def polar2cart(theta, r):
    x = r * np.cos(theta)
    y = r * np.sin(theta)
    return x, y

def normalize(newmin, newmax, minval, maxval, x):
    norm = (newmax-newmin)*((x-minval)/(maxval-minval))+newmin
    return norm

def Sx(x,L,H,k,x0,dist_range):
    nmin = -10
    nmax = 10
    x = (nmax-nmin) * (x - dist_range[0]) / (dist_range[1] - dist_range[0]) + nmin
    def lower(x,L,k,x0):
        H=0
        return L + ((H-L) / (1+np.e**(-k*(x-x0))))
    def upper(x,H,k,x0):
        L=0
        return L + ((H-L) / (1+np.e**(-k*(x-x0))))
    limit = np.max(abs(x))
    xl = -(limit/2)
    xh = limit/2
    return [lower(p,L,k*2,xl) if p < 0 else upper(p,H,k*2,xh) for p in x]
x = np.array(range(-10,10))
y = Sx(x,-1,1,0.5,0,(-10,10))

In [17]:
def kmeans_clustering(embedding, real_embedding, nr_clusters, allpx, dim_method):
    e_kmeans = KMeans(n_clusters=nr_clusters, random_state=42).fit(embedding)
    labels = e_kmeans.labels_
    proto = e_kmeans.cluster_centers_
    if not real_embedding:
        proto = dimReductionDict[dim_method](proto, 2).perform()
    proto_centers =  applyTransformation(proto)
    if allpx:
        proto_centers = newTransformation(embedding)
        labels = np.arange(np.size(embedding,0))
    return proto_centers, labels

def agglomerative_clustering(embedding, real_embedding, nr_clusters, allpx, dim_method):
    e_agglomerative = AgglomerativeClustering(n_clusters=nr_clusters, affinity='euclidean', linkage='ward').fit(embedding)
    labels = e_agglomerative.labels_
    proto = []
    for l in set(labels):
        idx = np.where(labels == l)
        proto.append(np.mean(embedding[idx], axis=0))
    proto = np.array(proto)
    if not real_embedding:
        proto = dimReductionDict[dim_method](proto, 2).perform()
    proto_centers =  applyTransformation(proto)
    if allpx:
        proto_centers = newTransformation(embedding)
        labels = np.arange(np.size(embedding,0))
    return proto_centers, labels

def affinity_propagation_clustering(embedding, real_embedding, nr_clusters, allpx, dim_method):
    e_affinity = AffinityPropagation(random_state=42).fit(embedding)
    labels = e_affinity.labels_
    proto = e_affinity.cluster_centers_
    if not real_embedding:
        proto = dimReductionDict[dim_method](proto, 2).perform()
    proto_centers =  applyTransformation(proto)
    if allpx:
        proto_centers = newTransformation(embedding)
        labels = np.arange(np.size(embedding,0))
    return proto_centers, labels

def dbscan_clustering(embedding, real_embedding, nr_clusters, allpx, dim_method):
    e_dbscan = DBSCAN(eps=0.3).fit(embedding)
    labels = e_dbscan.labels_
    if -1 in labels:
        labels += 1
    proto = []
    for l in set(labels):
        idx = np.where(labels == l)
        proto.append(np.mean(embedding[idx], axis=0))
    proto = np.array(proto)
    if not real_embedding:
        proto = dimReductionDict[dim_method](proto, 2).perform()
    proto_centers = applyTransformation(proto)
    if allpx:
        proto_centers = newTransformation(embedding)
        labels = np.arange(np.size(embedding,0))
    return proto_centers, labels

In [18]:
def newTransformation(centers, space="cartesian"):
    # calculate mean and max distance
    mx,my = np.mean(centers, axis=0)
    distances = np.array([np.sqrt((dx-mx)**2 + (dy-my)**2) for (dx,dy) in centers])
    max_dist = np.amax(distances)
    point = centers[np.argmax(distances)]
    print('The point with max distance is {} and the distance is {}'.format(point, max_dist))
    scale = (1-0.1)/max_dist
    transformed_centers = np.array([[scale*(dx-mx), scale*(dy-my)] for (dx,dy) in centers])
    return transformed_centers

def applyTransformation(centers):
    new_centers = newTransformation(centers)
    return new_centers

In [19]:
def bygp_cmap():
    # blue - yellow - green - pink colormap
    ro = [51,255,102,255]
    lo = [255,51,170,255]
    ru = [51,85,255,255]
    lu = [255,204,51,255]
    rolo = np.array([ro,lo])/255
    lolu = np.array([lo,lu])/255
    luru = np.array([lu,ru])/255
    ruro = np.array([ru,ro])/255
    rolo_cmap = mpl.colors.LinearSegmentedColormap.from_list(".", rolo, 45)
    lolu_cmap = mpl.colors.LinearSegmentedColormap.from_list(".", lolu, 45)
    luru_cmap = mpl.colors.LinearSegmentedColormap.from_list(".", luru, 45)
    ruro_cmap = mpl.colors.LinearSegmentedColormap.from_list(".", ruro, 45)
    color_list = []
    for x in [rolo_cmap, lolu_cmap, luru_cmap, ruro_cmap]:
        for i in range(90):
            color_list.append(list(x(i)))
        
    color_list = np.array(color_list)
    color_list = color_list[:,:-1]
    return color_list

def circular_colormap_chooser(colormap_name):
    #"hsv", "cmocean_phase", "hue_L60", "erdc_iceFire", "nic_Edge", "cyclic_mrybm", "cyclic_mygbm"
    cyclic_mygbm_list = np.loadtxt("./cyclic_colormaps/cyclic_mygbm.txt")
    cyclic_mrybm_list = np.loadtxt("./cyclic_colormaps/cyclic_mrybm.txt")
    cmocean_phase_list = np.loadtxt("./cyclic_colormaps/cmocean_phase.txt")
    hue_L60_list = np.loadtxt("./cyclic_colormaps/hue_L60.txt")
    erdc_iceFire_list = np.loadtxt("./cyclic_colormaps/erdc_iceFire.txt")
    nic_Edge_list = np.loadtxt("./cyclic_colormaps/nic_Edge.txt")
    bygp_cmap_list = bygp_cmap()
    
    cyclic_mygbm_cmap = mpl.colors.ListedColormap(cyclic_mygbm_list)
    cyclic_mrybm_cmap = mpl.colors.ListedColormap(cyclic_mrybm_list)
    cmocean_phase_cmap = mpl.colors.ListedColormap(cmocean_phase_list)
    hue_L60_cmap = mpl.colors.ListedColormap(hue_L60_list)
    erdc_iceFire_cmap = mpl.colors.ListedColormap(erdc_iceFire_list)
    nic_Edge_cmap = mpl.colors.ListedColormap(nic_Edge_list)
    bygp_cmap_cmap = mpl.colors.ListedColormap(bygp_cmap_list)
    husl_cmap = mpl.colors.ListedColormap(sns.color_palette("husl",100))
    hsl_cmap = mpl.colors.ListedColormap(sns.hls_palette(2000, s=1))
    #hsl_cmap = mpl.colors.ListedColormap(np.roll(sns.hls_palette(2000, s=1), 900, axis=0))
    
    cmap_dict = {
        "mygbm": cyclic_mygbm_cmap,
        "mrybm": cyclic_mrybm_cmap,
        "phase": cmocean_phase_cmap,
        "L60": hue_L60_cmap,
        "icefire": erdc_iceFire_cmap,
        "edge": nic_Edge_cmap,
        "bygp": bygp_cmap_cmap,
        "husl": husl_cmap,
        "hsl": hsl_cmap,
        "hsv": "hsv",
        "twilight": "twilight",
        "twilight_shifted": "twilight_shifted"
    }
    
    if colormap_name not in ["hsl", "husl", "hsv", "twilight", "twilight_shifted", "mygbm", "mrybm", "phase", "bygp", "L60", "icefire", "edge"]:
        raise ValueError('Wrong colormap name, choose one of the following options: "hsl", "husl", "hsv", "twilight", "twilight_shifted", "mygbm", "mrybm", "phase", "bygp", "L60", "icefire", "edge"')
    
    return cmap_dict[colormap_name]
    

def unit_cicle_color_wheel(prototype_positions, colormap_name):
    # Generate a figure with a polar projection
    fig = plt.figure(figsize=(6,6))
    ax = fig.add_axes([0.1,0.1,0.8,0.8], projection="polar")
    ax.set_theta_direction(1)
    # Plot a color mesh on the polar plot with the color set by the angle
    n = 2000 # The number of secants for the mesh
    t = np.linspace(-np.pi, np.pi, n) # Theta values
    r = np.linspace(0, 1, 2) # radius values, change 0 for non full circle
    rg, tg = np.meshgrid(r, t) # create a r,theta meshgrid
    c = tg # define color values as theta values
    norm = mpl.colors.Normalize(-np.pi, np.pi) # Define colomap normalization for 0 to 2*pi
    
    my_cmap = circular_colormap_chooser(colormap_name)
    
    im = ax.pcolormesh(t, r, c.T, cmap=my_cmap, norm=norm) # plot colormesh on axis with colormap
    
    ax.set_yticklabels([]) # turn of radial ticl labels (yticks)
    ax.tick_params(pad=15, labelsize=24) # cosmetic change to tick labels
    ax.spines["polar"].set_visible(False) # turn off the axis spine.
    #d = np.arctan2(prototype_positions[:,1], prototype_positions[:,0])
    #r = prototype_positions[:,1]
    d,r = cart2polar(prototype_positions[:,0], prototype_positions[:,1])
    #d = d[1:]
    #r = r[1:]
    ax.plot(d,r,"ko", markersize=6)
    #for i in range(len(d)):
    #    ax.text(d[i], r[i]+0.1, i, color="black")
    
    colors = im.to_rgba(d)  
    colors = np.array([colorsys.rgb_to_hls(*x) for x in colors[:,:3]])
    nmin = 0.25
    nmax = 0.75
    r2 = (nmax-nmin) * r + nmin
    colors[:,1] = r2
    colors = np.array([colorsys.hls_to_rgb(*x) for x in colors])
    colors = np.concatenate((colors, np.ones((colors.shape[0], 1))), axis=1)
    return d, r, colors

def spectral_cluster(data, labels, prototype_positions, dframe, colormap_name):
    grid_x = np.array(dframe.index.get_level_values("grid_x")).astype(int)
    grid_y = np.array(dframe.index.get_level_values("grid_y")).astype(int)
    img = np.zeros((np.amax(grid_y)+1, np.amax(grid_x)+1)) - 1
    proto_idx = np.array(range(np.amax(labels+1)))
    d,r,colors = unit_cicle_color_wheel(prototype_positions, colormap_name)
    colors2 = np.vstack([[0,0,0,1],colors])
    my_cmap = mpl.colors.ListedColormap(colors2)
    plt.figure()
    for i in proto_idx:
        idx = np.where(labels == i)[0]
        seg_x = grid_x[idx]
        seg_y = grid_y[idx]
        img[(seg_y, seg_x)] = i
    if -1 not in img:
        my_cmap
    plt.imshow(img, cmap=my_cmap)  

In [None]:
path = "../../backend/datasets/barley_101.h5"

dimReductionDict = {
    "pca": PCA,
    "umap": UMAP
}

dframe = pd.read_hdf(path)
data = dframe.values
nr_clusters = 8
embedding = None
colorlist_name = "hsl"

print(f"Data shape: {dframe.shape}")

In [None]:
# Example - embedding into clustering
dim_method = "pca"
embedding = dimReductionDict[dim_method](data, 2).perform()
prototype_positions, labels = kmeans_clustering(embedding, True, nr_clusters, False, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
# Example - clustering into embedding of prototypes
dim_method = "umap"
embedding = data
prototype_positions, labels = kmeans_clustering(embedding, False, nr_clusters, False, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
# Example - Projection of full embedding
dim_method = "umap"
embedding = dimReductionDict[dim_method](data, 2).perform()
prototype_positions, labels = kmeans_clustering(embedding, True, nr_clusters, True, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
# Example - embedding into clustering
dim_method = "pca"
embedding = dimReductionDict[dim_method](data, 2).perform()
prototype_positions, labels = kmeans_clustering(embedding, True, nr_clusters, False, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
# Example - clustering into embedding of prototypes
dim_method = "pca"
embedding = data
prototype_positions, labels = kmeans_clustering(embedding, False, nr_clusters, False, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
# Example - Projection of full embedding
dim_method = "pca"
embedding = dimReductionDict[dim_method](data, 2).perform()
prototype_positions, labels = kmeans_clustering(embedding, True, nr_clusters, True, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
# Example - Pre-reduce dimensions for computational speed
dim_method = "pca"
pre_embedding = dimReductionDict[dim_method](data, 10).perform()
dim_method = "umap"
embedding = dimReductionDict[dim_method](pre_embedding, 2).perform()

In [None]:
%matplotlib inline
prototype_positions, labels = kmeans_clustering(embedding, True, nr_clusters, False, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
prototype_positions, labels = kmeans_clustering(pre_embedding, False, nr_clusters, False, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)

In [None]:
prototype_positions, labels = kmeans_clustering(embedding, True, nr_clusters, True, dim_method)
spectral_cluster(data, labels, prototype_positions, dframe, colorlist_name)