In [23]:
from scipy.spatial.distance import squareform, cdist
from scipy.spatial.distance import pdist
from itertools import combinations
from functools import partial

import networkx as nx
import pandas as pd
import numpy as np
import random
import graco
import os

In [14]:
? cdist

In [13]:
cdist(u,v)

ValueError: XA must be a 2-dimensional array.

In [136]:
GDV = np.random.randint(100, size=[2**12,15])

In [167]:
distance, = random.sample(['euclidean', 'cityblock', 'seuclidean', 'sqeuclidean', 
                 'cosine', 'correlation', 'chebyshev', 'canberra', 
                 'braycurtis', 'mahalanobis'], 1)

In [168]:
distance

'braycurtis'

In [169]:
D1 = cdist(GDV.astype(float),GDV.astype(float), distance)

In [170]:
D2 = squareform(pdist(GDV, distance))

In [176]:
float(pdist([u,v], distance))

0.3383647798742138

In [172]:
d1 = D1[0,1]
d2 = D2[0,1]

In [173]:
d1, d2

(0.3383647798742138, 0.3383647798742138)

In [166]:
u = GDV[0]
v = GDV[1]

In [151]:
np.testing.assert_almost_equal(D1,D2)

AssertionError: 
Arrays are not almost equal to 7 decimals

Mismatch: 100%
Max absolute difference: 0.00059807
Max relative difference: 6.10481976e-05
 x: array([[0.       , 5.9492918, 4.6737453, ..., 5.3742529, 4.9430062,
        5.1458339],
       [5.9492918, 0.       , 5.4020045, ..., 3.9525219, 4.445893 ,...
 y: array([[0.       , 5.9489287, 4.67346  , ..., 5.3739248, 4.9427044,
        5.1455198],
       [5.9489287, 0.       , 5.4016747, ..., 3.9522807, 4.4456216,...

In [105]:
var = np.var(GDV, axis=0, ddof=1)

In [107]:
(np.sum((u-v)**2/np.var(GDV, axis=0, ddof=1)) + np.sum((u-v)**2/np.var(GDV, axis=0, ddof=0)))/2 

24.753621434742364

In [104]:
var

array([ 911.8836,  799.4275, 1065.5171,  724.8644,  846.3075,  873.2256,
        849.4379,  805.4331,  610.6675,  882.2291,  761.5584, 1037.9659,
        881.3444,  797.5684,  823.8604])

In [65]:
np.testing.assert_almost_equal(D1, D2, decimal=4)

AssertionError: 
Arrays are not almost equal to 4 decimals

Mismatch: 99%
Max absolute difference: 0.02046299
Max relative difference: 0.00252207
 x: array([[0.    , 4.9753, 4.8311, ..., 5.6798, 5.5659, 5.9256],
       [4.9753, 0.    , 6.4412, ..., 6.005 , 4.7027, 5.2528],
       [4.8311, 6.4412, 0.    , ..., 4.1455, 6.039 , 7.1764],...
 y: array([[0.    , 4.9628, 4.8189, ..., 5.6656, 5.5519, 5.9107],
       [4.9628, 0.    , 6.425 , ..., 5.9898, 4.6909, 5.2396],
       [4.8189, 6.425 , 0.    , ..., 4.1351, 6.0238, 7.1584],...

In [66]:
D1

array([[0.        , 4.97530114, 4.83110061, ..., 5.67983905, 5.56589755,
        5.92562222],
       [4.97530114, 0.        , 6.44124966, ..., 6.00495613, 4.70269151,
        5.25282916],
       [4.83110061, 6.44124966, 0.        , ..., 4.14554063, 6.03904217,
        7.17641413],
       ...,
       [5.67983905, 6.00495613, 4.14554063, ..., 0.        , 6.27902009,
        6.90941899],
       [5.56589755, 4.70269151, 6.03904217, ..., 6.27902009, 0.        ,
        5.40386731],
       [5.92562222, 5.25282916, 7.17641413, ..., 6.90941899, 5.40386731,
        0.        ]])

In [8]:
graco.distance.normalized1_lp(u,v,np.inf)

0.8620689655172413

In [9]:
graco.distance(u,v,'normalized1_linf')

0.8620689655172413

In [9]:
u,v = np.random.uniform(size=[2,100])

In [15]:
int('normalized1_l2'.split('normalized1_l')[1])

2

In [11]:
np.testing.assert_almost_equal(D1, D2, decimal=4)

In [3]:
for u,v in combinations(GDV,2):
    break

In [5]:
graco.distance.GDV_similarity(u,v)

0.2889752125926776

In [2]:
G = nx.erdos_renyi_graph(100,0.1)
GDV = graco.orbits(G)
GCV = graco.coefficients(GDV)

In [6]:
def GCV_distance(GCV, distance, nan='include'):
    
    gcv = GCV.copy()
    
    levels = len(GCV.columns.levels)
    gcv.columns = GCV.columns.droplevel(list(range(levels-2)))
    
    if nan == 'include':
        pass
    else:
        raise Exception
    return gcv

In [9]:
GCV_distance(GCV, 3)

Source,0,0,1,1,1,1,2,2,2,3,...,1-2,1-2,2-1,2-1,2-1,2-1,3-3,3-3,3-3,3-3
Target,2,3,5,8,10,12,7,11,13,11,...,10,12,5,8,11,13,10,12,13,14
0,0.942857,0.057143,0.841335,0.098361,0.056792,0.003513,0.883450,0.114996,0.001554,0.948718,...,0.088423,0.005469,0.817871,0.047809,0.042117,0.001138,0.906542,0.056075,0.037383,0.000000
1,0.863636,0.136364,0.786642,0.077922,0.120594,0.014842,0.742105,0.242105,0.015789,0.766667,...,0.140389,0.017279,0.779412,0.038603,0.063419,0.008272,0.764706,0.094118,0.105882,0.035294
2,0.800000,0.200000,0.716475,0.061303,0.186462,0.035760,0.645833,0.305556,0.048611,0.611111,...,0.161863,0.031042,0.773793,0.033103,0.060690,0.019310,0.722772,0.138614,0.138614,0.000000
3,0.892857,0.107143,0.805019,0.084942,0.098456,0.011583,0.800000,0.186667,0.013333,0.777778,...,0.067729,0.007968,0.845842,0.044625,0.028398,0.004057,0.836066,0.098361,0.065574,0.000000
4,0.928571,0.071429,0.873950,0.061625,0.064426,0.000000,0.846154,0.153846,0.000000,1.000000,...,0.063361,0.000000,0.871508,0.030726,0.033520,0.000000,1.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.904762,0.095238,0.848485,0.053030,0.098485,0.000000,0.789474,0.210526,0.000000,1.000000,...,0.092199,0.000000,0.868217,0.027132,0.038760,0.000000,1.000000,0.000000,0.000000,0.000000
96,0.933333,0.066667,0.863636,0.081818,0.054545,0.000000,0.857143,0.142857,0.000000,1.000000,...,0.040000,0.000000,0.879630,0.041667,0.018519,0.000000,1.000000,0.000000,0.000000,0.000000
97,0.944444,0.055556,0.840054,0.102151,0.057796,0.000000,0.882353,0.117647,0.000000,1.000000,...,0.044560,0.000000,0.857339,0.052126,0.019204,0.000000,1.000000,0.000000,0.000000,0.000000
98,0.911111,0.088889,0.817073,0.089431,0.079946,0.013550,0.823171,0.170732,0.006098,0.875000,...,0.076129,0.012903,0.827160,0.045267,0.038409,0.002743,0.808219,0.136986,0.054795,0.000000


In [5]:
GCV

Order,D,D,D,D,D,D,D,D,D,D,...,G,G,G,G,G,G,G,G,G,G
Source,0,0,1,1,1,1,2,2,2,3,...,1-2,1-2,2-1,2-1,2-1,2-1,3-3,3-3,3-3,3-3
Target,2,3,5,8,10,12,7,11,13,11,...,10,12,5,8,11,13,10,12,13,14
0,0.942857,0.057143,0.841335,0.098361,0.056792,0.003513,0.883450,0.114996,0.001554,0.948718,...,0.088423,0.005469,0.817871,0.047809,0.042117,0.001138,0.906542,0.056075,0.037383,0.000000
1,0.863636,0.136364,0.786642,0.077922,0.120594,0.014842,0.742105,0.242105,0.015789,0.766667,...,0.140389,0.017279,0.779412,0.038603,0.063419,0.008272,0.764706,0.094118,0.105882,0.035294
2,0.800000,0.200000,0.716475,0.061303,0.186462,0.035760,0.645833,0.305556,0.048611,0.611111,...,0.161863,0.031042,0.773793,0.033103,0.060690,0.019310,0.722772,0.138614,0.138614,0.000000
3,0.892857,0.107143,0.805019,0.084942,0.098456,0.011583,0.800000,0.186667,0.013333,0.777778,...,0.067729,0.007968,0.845842,0.044625,0.028398,0.004057,0.836066,0.098361,0.065574,0.000000
4,0.928571,0.071429,0.873950,0.061625,0.064426,0.000000,0.846154,0.153846,0.000000,1.000000,...,0.063361,0.000000,0.871508,0.030726,0.033520,0.000000,1.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.904762,0.095238,0.848485,0.053030,0.098485,0.000000,0.789474,0.210526,0.000000,1.000000,...,0.092199,0.000000,0.868217,0.027132,0.038760,0.000000,1.000000,0.000000,0.000000,0.000000
96,0.933333,0.066667,0.863636,0.081818,0.054545,0.000000,0.857143,0.142857,0.000000,1.000000,...,0.040000,0.000000,0.879630,0.041667,0.018519,0.000000,1.000000,0.000000,0.000000,0.000000
97,0.944444,0.055556,0.840054,0.102151,0.057796,0.000000,0.882353,0.117647,0.000000,1.000000,...,0.044560,0.000000,0.857339,0.052126,0.019204,0.000000,1.000000,0.000000,0.000000,0.000000
98,0.911111,0.088889,0.817073,0.089431,0.079946,0.013550,0.823171,0.170732,0.006098,0.875000,...,0.076129,0.012903,0.827160,0.045267,0.038409,0.002743,0.808219,0.136986,0.054795,0.000000


In [21]:
GCV.columns.droplevel([0,2])

Index(['0', '0', '1', '1', '1', '1', '2', '2', '2', '3', '3', '3', '0-0',
       '0-0', '2-1', '2-1', '2-1', '2-1', '1-2', '1-2', '1-2', '3-3', '3-3',
       '3-3', '0-0', '0-0', '1-1', '1-1', '1-1', '1-1', '1-2', '1-2', '1-2',
       '1-2', '2-1', '2-1', '2-1', '2-1', '3-3', '3-3', '3-3', '3-3'],
      dtype='object', name='Source')

In [22]:
range(-3)

range(0, -3)