In [None]:
#hide
%load_ext autoreload
%autoreload 2

In [None]:
# default_exp metrics

# Metrics

> Metrics and norms

In [None]:
#export 
from nbdev.showdoc import *
from grapht.graphtools import laplacian
from functools import lru_cache
from pathlib import Path
import networkx as nx
import numpy as np
import scipy.sparse as sp

## Norms

Scipy doesnt have implementations for these norms 

In [None]:
#export 
def sparse_norm(A, ord=2):
    "Like scipy.sparse.lingalg.norm but with 2 and max norm implemented"
    if ord == 2:
        return sparse_2norm(A)
    elif ord == 'max':
        return sparse_maxnorm(A)
    else:
        return sp.linalg.norm(A, ord=ord)

def sparse_2norm(A):
    "Returns the matrix 2-norm of a sparse matrix `A`"
    return np.abs(sp.linalg.eigsh(A, k=1, which='LM', return_eigenvectors=False))[0]

def sparse_maxnorm(A):
    "Returns the max |A_ij| for a sparse matrix `A`"
    return max(-A.min(), A.max())

## Metrics

The Laplacian distance isnt a real metric but 🤷‍♂️

In [None]:
#export
def laplacian_distance(G, Gp, setdiag=False):
    "Calculates $|| \mathcal{L} -  \mathcal{L}_p ||$ using the matrix 2-norm"
    L = laplacian(G, setdiag)
    Lp = laplacian(Gp, setdiag)
    E = Lp - L
    return sparse_2norm(E)

## Line distances

This is used to calculate the distance between edges. The distances are cached

In [None]:
#export
class LineDistances():
    
    def __init__(self, G):
        self.G = G
        self.line_graph = nx.line_graph(G)
        
    def __call__(self, edge1, edge2):
        "Calculating the linegraph distance between `edge1` and `edge2`"
        return nx.shortest_path_length(self.line_graph, edge1, edge2)
    
    def average_distance(self, edges):
        "Calculates the average linegraph distance between all pairs of edges in `edges`"
        distances = self.pairwise_distances(edges)
        return np.mean(distances)
    
    def pairwise_distances(self, edges):
        distances = []
        for i in range(len(edges)):
            for j in range(i+1, len(edges)):
                distances.append(self(edges[i], edges[j]))
        return distances
    

class LineDistancesDataset(LineDistances):
    
    def __init__(self, G, dataset):
        super(LineDistancesDataset, self).__init__(G)
        self.line_graph_nodes = list(self.line_graph.nodes())
        self.dataset = dataset
        self.load_dataset()
    
    def load_dataset(self):
        fname = Path(__file__).parents[1].joinpath(f'data/{self.dataset}_linegraph_distances.npy')
        #fname = f'data/{self.dataset}_linegraph_distances.npy'
        self.all_path_lengths = np.load(open(fname, 'rb'))
        
    def __call__(self, edge1, edge2):
        i, j = self.edge_index(edge1), self.edge_index(edge2)
        return self.all_path_lengths[i, j]
    
    @lru_cache(maxsize=None)
    def edge_index(self, edge):
        return self.line_graph_nodes.index(edge)

In [None]:
#hide
#from grapht.sampling import sample_edges
#from grapht.data import get_benchmark
#A, X, y = get_benchmark('cora')
#G = nx.from_scipy_sparse_matrix(A)
#ld = LineDistancesDataset(G, dataset='cora')
#linegraph = nx.line_graph(G)
#for _ in range(100):
#    edge1, edge2 = sample_edges(G, 2)
#    assert nx.shortest_path_length(linegraph, source=edge1, target=edge2) == int(ld(edge1, edge2))

# Degree

In [None]:
#export
def average_gmdegree(G, edges):
    "The average edge degree geometric mean over all edges in `edges`"
    return np.mean([edge_degree_gm(G, edge) for edge in edges])

def edge_degree_gm(G, edge):
    "For an edge (u, v) with degree du, dv this function returns the geometric mean of du and dv"
    return np.sqrt(G.degree(edge[0]) * G.degree(edge[1]))

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_graphtools.ipynb.
Converted 01_sampling.ipynb.
Converted 02_metrics.ipynb.
Converted 03_perturb.ipynb.
Converted 04_plotting.ipynb.
Converted 05_data.ipynb.
Converted index.ipynb.
