In this notebook we see basic examples which aid to understand the block function. In particular, we will center our examples on the COIL20 dataset.

In [1]:
from PIL import Image
import numpy as np
from numpy.random import default_rng
import os

import IBloFunMatch_inter as ibfm

import matplotlib.pyplot as plt
import networkx as nx
from scipy.spatial import distance

EXECUTABLE_PATH: /mnt/c/Users/Alvaro/Documents/repositories/IBloFunMatch/build/IBloFunMatch




In [2]:
def draw_repr_cycle(repr_cycle, figsize, data):
    fig, ax = plt.subplots(figsize=figsize)
    G = nx.Graph()
    for v in np.unique(repr_cycle):
        G.add_node(v)
    weighted_edges = []
    for edge in np.array(repr_cycle).reshape((-1,2)).tolist():
        weighted_edges.append((edge[0], edge[1], distance.euclidean(data[edge[0]], data[edge[1]])))
    # G.add_edges_from(weighted_edges)
    G.add_weighted_edges_from(weighted_edges)
    pos = nx.spectral_layout(G)
    nx.draw_networkx(G, ax=ax, pos=pos, width=figsize[0])
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    for node in pos.keys():
        im_array = data[node].reshape((128,128))
        im_frame = Image.fromarray(im_array)
        center = origin=pos[node]
        extent = (center[0]-0.1, center[0]+0.1, center[1]-0.1, center[1]+0.1)
        ax.imshow(im_array, cmap="gray", extent=extent, zorder=4)
    
    ax.set_ylim(ylim)
    ax.set_xlim(xlim)
    return ax, pos

# Example with Class 1 data and cycles

In [None]:
CLASS_LIST = [0]
NUM_SAMPLES = 72
NUM_EXP = 1
DATA_PERCENT = 0.5
SUBSET_SIZE = int(DATA_PERCENT * NUM_SAMPLES)
print(f"SUBSET_SIZE:{SUBSET_SIZE}")

Load data and labels per each class in `CLASS_LIST`

In [None]:
# Create list of labels
y=[]
for c in CLASS_LIST:
    y += [c]*NUM_SAMPLES
y = np.array(y)
# Read data
data = []
for c in CLASS_LIST:
    for i in range(NUM_SAMPLES):
        im_frame = Image.open(f"data_COIL20/coil-20-proc/obj{c+1}__{i}.png")
        np_frame = np.array(im_frame)
        data.append(np_frame.ravel())
    # samples per class
# going through classes
# Store data into variable
data = np.array(data)
print("All data shape")
print(data.shape)

Take subset some subsets and compute their induced block functions.

In [None]:
rng = default_rng(22)
cidx = CLASS_LIST[0]
class_indices = np.nonzero(np.array(y)==cidx)[0].tolist()
class_data = data[class_indices]
# Take NUM_EXP subsets randomly
exp_indices = []
for i in range(NUM_EXP):
    exp_indices.append(np.sort(rng.choice(class_indices, SUBSET_SIZE, replace=False)).tolist()) 
# Do matching for each sample 
exp_ibfm = []
output_dir = "output"
for i in range(NUM_EXP):
    indices_subset = exp_indices[i]
    exp_ibfm.append(ibfm.get_IBloFunMatch_subset(None, data, indices_subset, output_dir, num_it=4, points=True, max_rad=-1))

Plot the barcode of the class 0

In [None]:
os.makedirs(f"plots/COIL_CYCLES", exist_ok=True)
X_barcode_1 = exp_ibfm[0]["X_barcode_1"]
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(5,3))
ibfm.plot_barcode(X_barcode_1, "navy", ax)
cidx = CLASS_LIST[0]
plt.savefig(f"plots/COIL_CYCLES/class_{cidx}_barcode.png")

We consdier the longest bar from the codomain and print its representative.

In [None]:
long_X_bars = ((exp_ibfm[0]["X_barcode_1"][:,1]-exp_ibfm[0]["X_barcode_1"][:,0])>500).tolist()
long_X_bars = np.nonzero(long_X_bars)[0]
print(exp_ibfm[0]["X_barcode_1"][long_X_bars])

In [None]:
# %%capture
for id, repr_cycle in enumerate(exp_ibfm[0]["X_reps_1"]):
    if id not in long_X_bars:
        continue
    print(f"Cycle {id}, number of elements: {len(np.unique(repr_cycle))}")
    print(np.sort(np.unique(repr_cycle)))
    figsize = (len(repr_cycle), len(repr_cycle))
    figsize = (8,12)
    ax = draw_repr_cycle(repr_cycle, figsize, class_data)
    os.makedirs(f"plots/COIL_CYCLES/codomain", exist_ok=True)
    plt.savefig(f"plots/COIL_CYCLES/codomain/cycle_rep_codomain_{id}.png")

In fact, notice that the longest cycle contains all samples from the dataset.

In [None]:

for id_exp, ibfm_out in enumerate(exp_ibfm):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(8,4))
    if(ibfm_out["S_barcode_1"].shape[0]>0):
        ibfm.plot_matching(ibfm_out, ax, fig, dim=1, frame_on=True, strengths=False, block_function=True)
    # ax[0].set_title(f"Experiment {id_exp}", fontsize=20)
    plt.savefig(f"plots/COIL_CYCLES/blofun_{id_exp}.png")

In [None]:
long_S_bars = (exp_ibfm[0]["S_barcode_1"][:,1]-exp_ibfm[0]["S_barcode_1"][:,0])>500
long_S_bars = np.nonzero(long_S_bars)[0]
exp_ibfm[0]["S_barcode_1"][long_S_bars]

In [None]:
# %%capture
for id, repr_cycle in enumerate(exp_ibfm[0]["S_reps_1"]):
    if id not in long_S_bars:
        continue
    print(f"Cycle {id}")
    print(len(np.sort(np.unique(repr_cycle))))
    print(np.sort(np.unique(repr_cycle)))
    figsize = (len(repr_cycle), len(repr_cycle))
    figsize = (8,12)
    ax = draw_repr_cycle(repr_cycle, figsize, class_data)
    os.makedirs(f"plots/COIL_CYCLES/exp_0", exist_ok=True)
    plt.savefig(f"plots/COIL_CYCLES/exp_0/cycle_rep_{id}.png")

# Example with Class 2 data and cycles

Now the topology will be a bit more involved. Notice that there are two prominent intervals.

In [None]:
CLASS_LIST = [1]

In [None]:
# Create list of labels
y=[]
for c in CLASS_LIST:
    y += [c]*NUM_SAMPLES
y = np.array(y)
# Read data
data = []
for c in CLASS_LIST:
    for i in range(NUM_SAMPLES):
        im_frame = Image.open(f"data_COIL20/coil-20-proc/obj{c+1}__{i}.png")
        np_frame = np.array(im_frame)
        data.append(np_frame.ravel())
    # samples per class
# going through classes
# Store data into variable
data = np.array(data)
print("All data shape")
print(data.shape)

In [None]:
# Do matching of class with itself
exp_ibfm = []
output_dir = "output"
indices_subset = list(range(72))
exp_ibfm.append(ibfm.get_IBloFunMatch_subset(None, data, indices_subset, output_dir, num_it=4, points=True, max_rad=-1))

In [None]:
os.makedirs(f"plots/COIL_CYCLES", exist_ok=True)
X_barcode_1 = exp_ibfm[0]["X_barcode_1"]
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(5,3))
ibfm.plot_barcode(X_barcode_1, "navy", ax)
cidx = CLASS_LIST[0]
plt.savefig(f"plots/COIL_CYCLES/class_{cidx}_barcode.png")

In [None]:
# %%capture
colorcycles=["red", "blue", "green"]
for id, repr_cycle in enumerate(exp_ibfm[0]["X_reps_1"]):
    print(f"Cycle {id}, number of elements: {len(np.unique(repr_cycle))}")
    print(np.sort(np.unique(repr_cycle)))
    figsize = (len(repr_cycle), len(repr_cycle))
    figsize = (8,12)
    ax, pos = draw_repr_cycle(repr_cycle, figsize, class_data)
    os.makedirs(f"plots/COIL_CYCLES/codomain_2", exist_ok=True)
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos[edge[0]]
        end = pos[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[id], linewidth=55, zorder=0)
    plt.savefig(f"plots/COIL_CYCLES/codomain_2/cycle_rep_codomain_{id}.png")

In [None]:
def plot_cycles_join(data, repr_list, fixed, pos_fixed, figsize, k=0.5, imsize=0.6):
    fig, ax = plt.subplots(figsize=figsize)
    repr_cycle = repr_list
    G = nx.Graph()
    for v in np.unique(repr_cycle):
        G.add_node(v)
    
    already_edge = []
    weighted_edges = []
    for edge in np.array(repr_cycle).reshape((-1,2)).tolist():
        if set(edge) in already_edge:
            continue
        weighted_edges.append((edge[0], edge[1], distance.euclidean(data[edge[0]], data[edge[1]])))
        already_edge.append(set(edge))
    # G.add_edges_from(weighted_edges)
    G.add_weighted_edges_from(weighted_edges)
    
    pos = nx.spring_layout(G, k=k, pos=pos_fixed, fixed=fixed)
    # pos = nx.spectral_layout(G, pos=pos_init)
    nx.draw_networkx(G, ax=ax, pos=pos, width=figsize[0])
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    for node in pos.keys():
        im_array = data[node].reshape((128,128))
        im_frame = Image.fromarray(im_array)
        center = origin=pos[node]
        width = imsize/2
        extent = (center[0]-width, center[0]+width, center[1]-width, center[1]+width)
        ax.imshow(im_array, cmap="gray", extent=extent, zorder=4)
    
    ax.set_ylim(ylim)
    ax.set_xlim(xlim)
    return ax

In [None]:
# put together representatives into a single graph 
edgelist = []
for repr_cycle in exp_ibfm[0]["X_reps_1"]:
    edgelist += repr_cycle
fixed = np.unique(exp_ibfm[0]["X_reps_1"][0]).tolist()
figsize = (8,8)
pos_fixed={}
for i, v in enumerate(fixed):
    angle = (i/len(fixed)) * 2* np.pi + np.pi/2*1.2
    pos_fixed[v] = (9*np.sin( angle), 7*np.cos(angle))

inner_circle = [i for i in range(72) if i not in fixed]
inner_circle
for i, v in enumerate(inner_circle):
    angle = (-i/len(inner_circle)) * 2* np.pi - 1.9*np.pi/4
    pos_fixed[v] = (7*np.sin( angle), 3*np.cos(angle))

fixed += inner_circle
# pos_fixed[38] = (9,-0.8)
# pos_fixed[25] = (-5,-2)

ax = plot_cycles_join(data, edgelist, fixed, pos_fixed, figsize, k=4, imsize=1)
plt.savefig("plots/COIL_CYCLES/codomain_2/cycles_join.png")

In [None]:
# put together representatives into a single graph 
figsize = (8,8)
colorcycles=["red", "blue", "green"]
for rep_idx, repr_cycle in enumerate(exp_ibfm[0]["X_reps_1"]):
    ax = plot_cycles_join(data, edgelist, fixed, pos_fixed, figsize, k=4, imsize=1)
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos_fixed[edge[0]]
        end = pos_fixed[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[rep_idx], linewidth=25, zorder=0)
    
    plt.savefig(f"plots/COIL_CYCLES/codomain_2/cycles_join_rep_{rep_idx}.png")

In [None]:
ibfm_out = exp_ibfm[0]
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(8,4))
if(ibfm_out["S_barcode_1"].shape[0]>0):
    ibfm.plot_matching(ibfm_out, ax, fig, dim=1, frame_on=True, strengths=False, block_function=True)
# ax[0].set_title(f"Experiment {id_exp}", fontsize=20)
plt.savefig(f"plots/COIL_CYCLES/blofun_itself.png")

In [None]:
rng = default_rng(22)
cidx = CLASS_LIST[0]
class_indices = np.nonzero(np.array(y)==cidx)[0].tolist()
class_data = data[class_indices]
# Take NUM_EXP subsets randomly
exp_indices = []
for i in range(NUM_EXP):
    exp_indices.append(np.sort(rng.choice(class_indices, SUBSET_SIZE, replace=False)).tolist()) 
# Do matching for each sample 
exp_ibfm = []
output_dir = "output"
for i in range(NUM_EXP):
    indices_subset = exp_indices[i]
    exp_ibfm.append(ibfm.get_IBloFunMatch_subset(None, data, indices_subset, output_dir, num_it=4, points=True, max_rad=-1))

In [None]:
for id_exp, ibfm_out in enumerate(exp_ibfm):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(8,4))
    if(ibfm_out["S_barcode_1"].shape[0]>0):
        ibfm.plot_matching(ibfm_out, ax, fig, dim=1, frame_on=True, strengths=False, block_function=True)
    # ax[0].set_title(f"Experiment {id_exp}", fontsize=20)
    plt.savefig(f"plots/COIL_CYCLES/blofun_{id_exp}.png")

In [None]:
# %%capture
for id, repr_cycle in enumerate(exp_ibfm[0]["S_reps_1"]):
    print(f"Cycle {id}")
    print(len(np.sort(np.unique(repr_cycle))))
    print(np.sort(np.unique(repr_cycle)))
    figsize = (len(repr_cycle), len(repr_cycle))
    figsize = (8,12)
    ax, pos = draw_repr_cycle(repr_cycle, figsize, class_data)
    os.makedirs(f"plots/COIL_CYCLES/exp_2", exist_ok=True)
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos[edge[0]]
        end = pos[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[id], linewidth=55, zorder=0)
    plt.savefig(f"plots/COIL_CYCLES/exp_2/cycle_rep_{id}.png")

In [None]:
# put together representatives into a single graph 
figsize = (8,8)
colorcycles=["red", "blue", "green"]
for rep_idx, repr_cycle in enumerate(exp_ibfm[0]["S_reps_1"]):
    ax = plot_cycles_join(data, edgelist, fixed, pos_fixed, figsize, k=4, imsize=1)
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos_fixed[edge[0]]
        end = pos_fixed[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[rep_idx], linewidth=25, zorder=0)
    
    plt.savefig(f"plots/COIL_CYCLES/exp_2/rep_X_{rep_idx}.png")

# Second class, bad subset example

In [None]:
indices_subsets = []
idx_subset = list(range(18)) + list(range(54,72))
print(len(idx_subset))
indices_subsets.append(idx_subset)
idx_subset = list(range(0, 72, 5))
print(len(idx_subset))
indices_subsets.append(idx_subset)
# idx_subset = list(range(0, 72, 5)) + list(range(2,18,5)) + list(range(53,72,5))
idx_subset = list(range(0, 72, 5)) + list(range(2,18,3)) + list(range(53,72,3))
idx_subset = np.unique(np.sort(idx_subset))
print(len(idx_subset))
indices_subsets.append(idx_subset)
# Do matching for each sample 
exp_ibfm = []
output_dir = "output"
for idx_subset in indices_subsets:
    exp_ibfm.append(ibfm.get_IBloFunMatch_subset(None, data, idx_subset, output_dir, num_it=4, points=True, max_rad=-1))

In [None]:
for id_exp, ibfm_out in enumerate(exp_ibfm):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(8,4))
    if(ibfm_out["S_barcode_1"].shape[0]>0):
        ibfm.plot_matching(ibfm_out, ax, fig, dim=1, frame_on=True, strengths=False, block_function=True)
    plt.savefig(f"plots/COIL_CYCLES/blofun_bad_{id_exp}.png")

In [None]:
# put together representatives into a single graph 
figsize = (8,8)
colorcycles=["red", "blue", "green"]
for rep_idx, repr_cycle in enumerate(exp_ibfm[0]["S_reps_1"]):
    ax = plot_cycles_join(data, edgelist, fixed, pos_fixed, figsize, k=4, imsize=1)
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos_fixed[edge[0]]
        end = pos_fixed[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[rep_idx], linewidth=25, zorder=0)
    
    plt.savefig(f"plots/COIL_CYCLES/exp_2/rep_X_bad_{rep_idx}.png")

In [None]:
# put together representatives into a single graph 
figsize = (8,8)
colorcycles=["red", "blue", "green", "orange"]
for rep_idx, repr_cycle in enumerate(exp_ibfm[2]["S_reps_1"]):
    ax = plot_cycles_join(data, edgelist, fixed, pos_fixed, figsize, k=4, imsize=1)
    print(np.unique(repr_cycle))
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos_fixed[edge[0]]
        end = pos_fixed[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[rep_idx], linewidth=25, zorder=0)
    
    plt.savefig(f"plots/COIL_CYCLES/exp_2/rep_X_bad_1_{rep_idx}.png")

# See how the Minmax sampling performs 
It should be good.

In [None]:
distance_mat = [] 
for i, pt in enumerate(data):
    distance_mat.append([])
    for j, target in enumerate(data[:i]):
        distance_mat[i].append(distance.euclidean(pt, target))

In [None]:
num_pts = data.shape[0]
dist_M = np.zeros((num_pts, num_pts))
for i in range(num_pts):
    for j in range(num_pts):
        if j < i:
            dist_M[i][j] = distance_mat[i][j]
        elif i < j:
            dist_M[i][j] = distance_mat[j][i]

Now that we have the distance matrix, we take about 20 samples.

In [None]:
indices_subset = [0]
for k in range(35):
    oposite_indices = [i for i in range(num_pts) if i not in indices_subset]
    min_vals_col = np.min(dist_M[indices_subset][:,oposite_indices],axis=0)
    new_vertex = oposite_indices[np.argmax(min_vals_col)]
    indices_subset.append(new_vertex)

indices_subset = np.sort(indices_subset)

See how the matching performs.

In [None]:
indices_subsets = [indices_subset]
# Do matching for each sample 
exp_ibfm = []
output_dir = "output"
for idx_subset in indices_subsets:
    exp_ibfm.append(ibfm.get_IBloFunMatch_subset(None, data, idx_subset, output_dir, num_it=4, points=True, max_rad=-1))

for id_exp, ibfm_out in enumerate(exp_ibfm):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(8,4))
    print(ibfm_out["S_barcode_1"].shape)
    if(ibfm_out["S_barcode_1"].shape[0]>0):
        ibfm.plot_matching(ibfm_out, ax, fig, dim=1, frame_on=True, strengths=False, block_function=True)
    plt.savefig(f"plots/COIL_CYCLES/blofun_bad_{id_exp}.png")

In [None]:
# put together representatives into a single graph 
figsize = (8,8)
colorcycles=["red", "blue", "green", "orange"]
for rep_idx, repr_cycle in enumerate(exp_ibfm[0]["S_reps_1"]):
    ax = plot_cycles_join(data, edgelist, fixed, pos_fixed, figsize, k=4, imsize=1)
    for edge in np.array(repr_cycle).reshape((-1,2)):
        start = pos_fixed[edge[0]]
        end = pos_fixed[edge[1]]
        ax.plot([start[0], end[0]], [start[1], end[1]], c=colorcycles[rep_idx], linewidth=25, zorder=0)
    
    plt.savefig(f"plots/COIL_CYCLES/exp_2/rep_minmax_1_{rep_idx}.png")