# Data-to-graph demo

_Ismael Mendoza (UMich) & Alex Malz (CMU)_

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pickle 

In [None]:
import networkx as nx

## Load data

This notebook shows how to load the mock data from BTK provided in `/data/mock.pkl`

In [None]:
images, true, pred = pickle.load(open('../data/mock.pkl', 'rb'))
images.shape

`images` is 100 postage stamps, 6 photometric bands, 120x120 pixels each

`true` and `pred` are lists of astropy tables with columns for the `x` and `y` coordinates of the centroids and a `flux` value for each source, one table per postage stamp

In [None]:
idx = np.random.randint(0, len(images))
idx

In [None]:
band_idx = 2
fig, ax = plt.subplots(1,1, figsize=(8,8))
ax.imshow(images[idx,band_idx], cmap='gray')
ax.scatter(
    true[idx]["x"],
    true[idx]["y"],
    color="blue",
    marker="x",
    label="True centroids",
    s=150,
    linewidth=3,
)
ax.scatter(
    pred[idx]["x"],
    pred[idx]["y"],
    color="red",
    marker="+",
    label="Pred centroids",
    s=150,
    linewidth=3,
)

# add colorbar to axis
cbar = fig.colorbar(ax.images[0], ax=ax)
cbar.ax.set_ylabel("Flux", rotation=-90, va="bottom", fontsize=20)

# add legend to axis
ax.legend(loc="upper right")

## interpret one as graph

In [None]:
# true[idx].add_column([str(ind)+'true' for ind in range(len(true[idx]))], index=0, name='index')
# pred[idx].add_column([str(ind)+'pred' for ind in range(len(pred[idx]))], index=0, name='index')

true[idx].add_column(['T'+str(round(np.log10(t['flux']), 2)) for t in true[idx]], name='mag')
pred[idx].add_column(['P'+str(round(np.log10(p['flux']), 2)) for p in pred[idx]], name='mag')

let's associate a notion of weight with the edges, and let's make the weight like an inverse distance

In [None]:
def get_dist(a, b):
    dx = a['x'] - b['x']
    dy = a['y'] - b['y']
    d = np.sqrt(dx**2 + dy**2)
    return d

In [None]:
edgelist = []
for t in true[idx]:
    for p in pred[idx]:
        edgelist.append((t['mag'], p['mag'], round(1./get_dist(t, p), 3)))

In [None]:
G = nx.Graph()
G.add_nodes_from(true[idx]['mag'], bipartite=0)
G.add_nodes_from(pred[idx]['mag'], bipartite=1)
G.add_weighted_edges_from(edgelist)

In [None]:
G.edges.data('weight')

In [None]:
nx.bipartite.sets(G)[0]

In [None]:
edge_widths = [len(G.get_edge_data(u, v)) for u, v in G.edges()]

# pos = nx.bipartite_layout(G, nx.bipartite.sets(G)[0])
pos = nx.bipartite_layout(G, nx.bipartite.sets(G)[0])
# pos_p = nx.bipartite_layout(G, nx.bipartite.sets(G)[1])
nx.draw_networkx_nodes(G, pos, nodelist=nx.bipartite.sets(G)[0], node_color="blue", label='True')
nx.draw_networkx_nodes(G, pos, nodelist=nx.bipartite.sets(G)[1], node_color="red", label='Pred')
# nx.draw(G, with_labels=True)
# edge_labels = nx.get_edge_attributes(G, "weight")
# pos = nx.spring_layout(G, seed=7)
# top = G

# top = nx.bipartite.sets(G)[1]
nx.draw_networkx_edges(G, pos, width=edge_widths)

## make all into graphs

## explore diagnostics on "real" data

bipartite.spectral_bipartivity(G)

