In [None]:
import numpy as np
from numpy.random import default_rng
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.spatial import distance

from sklearn.preprocessing import StandardScaler
from sklearn import decomposition

from PIL import Image
import networkx as nx

import IBloFunMatch_inter as ibfm

output_dir = "output"

# Read data

First, set number of classes, samples per class and create labels.

In [None]:
num_class = 10
num_samples = 72
y = []
for c in range(num_class):
    y += [c]*num_samples

Read COIL 20 images and store into an array

In [None]:
data = []
for c in range(1, num_class+1):
    for i in range(num_samples):
        im_frame = Image.open(f"coil-20-proc/coil-20-proc/obj{c}__{i}.png")
        np_frame = np.array(im_frame)
        data.append(np_frame.ravel())
    # samples per class
# going through classes
data = np.array(data)

In [None]:
len(y)

In [None]:
data.shape

In [None]:
all_indices = list(range(data.shape[0]))

In [None]:
def draw_repr_cycle(repr_cycle, figsize):
    fig, ax = plt.subplots(figsize=figsize)
    G = nx.Graph()
    for v in np.unique(repr_cycle):
        G.add_node(v)
    weighted_edges = []
    for edge in np.array(repr_cycle).reshape((-1,2)).tolist():
        weighted_edges.append((edge[0], edge[1], distance.euclidean(data[edge[0]], data[edge[1]])))
    # G.add_edges_from(weighted_edges)
    G.add_weighted_edges_from(weighted_edges)
    #pos = nx.spring_layout(G)
    pos = nx.spectral_layout(G)
    nx.draw_networkx(G, ax=ax, pos=pos, width=figsize[0])
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    for node in pos.keys():
        im_array = data[node].reshape((128,128))
        im_frame = Image.fromarray(im_array)
        center = pos[node]
        extent = (center[0]-0.1, center[0]+0.1, center[1]-0.1, center[1]+0.1)
        ax.imshow(im_array, cmap="gray", extent=extent, zorder=4)
    
    ax.set_ylim(ylim)
    ax.set_xlim(xlim)
    return ax

# Compute self matchings
### On Raw Data

In [None]:
output_data_ibfm = ibfm.get_IBloFunMatch_subset(None, data, all_indices, output_dir, num_it=4, points=True, max_rad=-1)

In [None]:
figsizes=[(4,2), (8,4)]
for dim in range(2):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=figsizes[dim])
    ibfm.plot_matching(output_data_ibfm, output_dir, ax, fig,dim=dim, frame_on=True)
    plt.savefig(f"plots/COIL/self_matching_{dim}.png")

In [None]:
output_data_ibfm.keys()

Check that the intervals are matched to themselves.

In [None]:
long_bars = np.nonzero((output_data_ibfm["S_barcode_1"][:,1]-output_data_ibfm["S_barcode_1"][:,0])>1000)[0]
len(long_bars)

In [None]:
origin_bars = output_data_ibfm["S_barcode_1"][long_bars]

In [None]:
origin_bars

In [None]:
target_bars = output_data_ibfm["X_barcode_1"][np.array(output_data_ibfm["block_function_1"])[long_bars]]

In [None]:
np.max(np.abs(origin_bars - target_bars))<0.0005

Print matching strengths.

In [None]:
output_data_ibfm["matching_strengths_1"][long_bars]

Inspect the longest cycle to see what does it detect on the data.

In [None]:
long_bars

In [None]:
%%capture
for i in long_bars:
    repr_cycle = output_data_ibfm["S_reps_1"][i]
    figsize = (len(repr_cycle)*1.5, len(repr_cycle)*1.5)
    ax = draw_repr_cycle(repr_cycle, figsize, data)
    strength = output_data_ibfm["matching_strengths_1"][i]
    ax.set_title(f"Matching Strength: {strength}", fontsize=figsize[0]*2, color="green")
    plt.savefig(f"plots/COIL/raw/cycle_reps_{i}.png")

### Special plotting for some homology classes

In [None]:
long_bars[6]

In [None]:
repr_cycle = output_data_ibfm["S_reps_1"][26]

In [None]:
G = nx.Graph()
for v in np.unique(repr_cycle):
    G.add_node(v)
edges = np.array(repr_cycle).reshape((-1,2)).tolist()
G.add_edges_from(edges)

In [None]:
nx.connected_components(G)
S = [G.subgraph(c).copy() for c in nx.connected_components(G)]

In [None]:
np.unique(list(S[0].edges))

In [None]:

for i in range(len(S)):
    fig, ax = plt.subplots(figsize=figsize)
    pos = nx.spectral_layout(S[i])
    nx.draw_networkx(S[i], ax=ax, pos=pos, width=figsize[0])
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    for node in pos.keys():
        im_array = data[node].reshape((128,128))
        im_frame = Image.fromarray(im_array)
        center = pos[node]
        extent = (center[0]-0.1, center[0]+0.1, center[1]-0.1, center[1]+0.1)
        ax.imshow(im_array, cmap="gray", extent=extent, zorder=4)
    
    ax.set_ylim(ylim)
    ax.set_xlim(xlim)
    plt.savefig(f"plots/COIL/raw/difficult_cycle_rep_{i}.png")

In [None]:
i = long_bars[6]
repr_cycle = output_data_ibfm["S_reps_1"][i]
for j in np.unique(repr_cycle):
    plt.imsave(f"plots/COIL/raw/cycle/cycle_{j}.png", data[j].reshape((128,128)), cmap="gray")

### On data using PCA
We do not scale the data since when scaling the resulting matching strenghts are very small.

In [None]:
# data_scal = StandardScaler().fit_transform(data)
pca = decomposition.PCA()
pca.n_components = 8
scaled_data = StandardScaler().fit_transform(data)
data_red_pca = pca.fit_transform(scaled_data)

In [None]:
output_data_ibfm_pca = ibfm.get_IBloFunMatch_subset(None, data_red_pca, all_indices, output_dir, num_it=4, points=True, max_rad=-1)

In [None]:
figsizes=[(4,2), (8,4)]
for dim in range(2):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=figsizes[dim])
    ibfm.plot_matching(output_data_ibfm_pca, output_dir, ax, fig,dim=dim, frame_on=True)
    plt.savefig(f"plots/COIL/self_matching_pca_{dim}.png")

In [None]:
output_data_ibfm_pca["matching_strengths_1"]

In [None]:
long_bars_pca = np.nonzero(output_data_ibfm_pca["matching_strengths_1"]>140)[0]

In [None]:
%%capture
for i in long_bars_pca:
    repr_cycle = output_data_ibfm_pca["S_reps_1"][i]
    figsize = (len(repr_cycle)*1.5, len(repr_cycle)*1.5)
    ax = draw_repr_cycle(repr_cycle, figsize)
    strength = output_data_ibfm_pca["matching_strengths_1"][i]
    ax.set_title(f"Matching Strength: {strength}", fontsize=figsize[0]*2, color="green")
    plt.savefig(f"plots/COIL/pca/cycle_reps_{i}.png")

# UMAP reduction (2 components)

In [None]:
import umap

In [None]:
umap_reducer = umap.UMAP(n_components=2, min_dist=0.1, metric="euclidean")
scaled_data = StandardScaler().fit_transform(data)
data_umap = umap_reducer.fit_transform(scaled_data)

In [None]:
plt.scatter(
    data_umap[:, 0],
    data_umap[:, 1],
    c=[sns.color_palette(n_colors=len(y))[x] for x in y])
plt.gca().set_aspect('equal', 'datalim')
plt.title('UMAP projection of the COIL20 dataset', fontsize=24);

In [None]:
output_data_ibfm_umap = ibfm.get_IBloFunMatch_subset(None, data_umap, all_indices, output_dir, num_it=4, points=True, max_rad=-1)

In [None]:
figsizes=[(4,2), (8,4)]
for dim in range(2):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=figsizes[dim])
    ibfm.plot_matching(output_data_ibfm_umap, output_dir, ax, fig,dim=dim, frame_on=True)
    plt.savefig(f"plots/COIL/self_matching_umap_{dim}.png")

In [None]:
output_data_ibfm_umap["matching_strengths_1"]

In [None]:
long_bars_umap = np.nonzero(output_data_ibfm_umap["matching_strengths_1"]>0.1)[0]

In [None]:
long_bars_umap

In [None]:
%%capture
for i in long_bars_umap:
    repr_cycle = output_data_ibfm_umap["S_reps_1"][i]
    figsize = (len(repr_cycle)*1.5, len(repr_cycle)*1.5)
    ax = draw_repr_cycle(repr_cycle, figsize, data)
    strength = output_data_ibfm_umap["matching_strengths_1"][i]
    ax.set_title(f"Matching Strength: {strength}", fontsize=figsize[0]*2, color="green")
    plt.savefig(f"plots/COIL/umap/cycle_reps_{i}.png")

# UMAP on more components (6)

In [None]:
umap_reducer_6 = umap.UMAP(n_components=6, min_dist=0.1, metric="euclidean")
scaled_data = StandardScaler().fit_transform(data)
data_umap_6 = umap_reducer_6.fit_transform(scaled_data)

In [None]:
output_data_ibfm_umap_6 = ibfm.get_IBloFunMatch_subset(None, data_umap_6, all_indices, output_dir, num_it=4, points=True, max_rad=-1)

In [None]:
figsizes=[(4,2), (8,4)]
for dim in range(2):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=figsizes[dim])
    ibfm.plot_matching(output_data_ibfm_umap_6, output_dir, ax, fig,dim=dim, frame_on=True)
    plt.savefig(f"plots/COIL/self_matching_umap_6_{dim}.png")

In [None]:
long_bars_umap_6 = np.nonzero(output_data_ibfm_umap_6["matching_strengths_1"]>0.1)[0]
len(long_bars_umap_6)

In [None]:
%%capture
for i in long_bars_umap_6:
    repr_cycle = output_data_ibfm_umap_6["S_reps_1"][i]
    figsize = (len(repr_cycle)*1.5, len(repr_cycle)*1.5)
    ax = draw_repr_cycle(repr_cycle, figsize, data)
    strength = output_data_ibfm_umap_6["matching_strengths_1"][i]
    ax.set_title(f"Matching Strength: {strength}", fontsize=figsize[0]*2, color="green")
    plt.savefig(f"plots/COIL/UMAP_6/cycle_reps_{i}.png")