In [1]:
from scipy.spatial.distance import squareform, cdist
from functools import partial

import os
import graco
import numpy as np
import pandas as pd
import networkx as nx

In [2]:
pd.set_option("display.max_columns", 50)

DATA_DIRECTORY = "/media/clusterduck123/joe/data"
HUMAN_DIRECTORY = f"{DATA_DIRECTORY}/processed-data/human"
NETWORK_DIRECTORY = f"{HUMAN_DIRECTORY}/networks"
MATRIX_DIRECTORY  = f"{HUMAN_DIRECTORY}/distance-matrices"

# Distance matrices

In [3]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/PPI_BioGRID.txt")
GDV = graco.orbits(PPI_nx)
GCV = graco.coefficients(GDV)

## GDV

## GCV-D

In [4]:
feature = 'GCV-D'
feature_matrix = GCV['D']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [5]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-A

In [6]:
feature = 'GCV-A'
feature_matrix = GCV['A']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [None]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-G

In [None]:
feature = 'GCV-G'
feature_matrix = GCV['G']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [None]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DA

In [None]:
feature = 'GCV-DA'
feature_matrix = GCV[['D','A']]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [None]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DG

In [5]:
GCV_DG = GCV[['D', 'G']]

if not os.path.exists(f"{MATRIX_DIRECTORY}/GCV-DG"):
    os.makedirs(f"{MATRIX_DIRECTORY}/GCV-DG/")

In [6]:
order_source_DG = sorted({(order,source) for order,source,target in GCV_DG.columns})

### graco

In [8]:
# Normalized1 L_1
D_dict = {}
for order,source in order_source_DG:
    length = len(GCV_DG[order][source].T)
    D = graco.distance_matrix.normalized1_lp(GCV_DG[order][source], 1) / normalizer('normalized1_l1',length)
    D_dict[(order,source)] = D 
    print(length, np.nanmax(D))
    
D = np.nanmean([D_i for D_i in D_dict.values()], axis=0)
np.savetxt(f"{MATRIX_DIRECTORY}/GCV-DG/normalized1-l1_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

2 1.0
4 1.0
3 1.0
3 1.0
2 1.0
4 1.0
4 1.0


MemoryError: Unable to allocate array with shape (17125, 1, 17125) and data type float64

In [None]:
# Normalized1 L_2
D_dict = {}
for order,source in order_source_DG:
    length = len(GCV_DG[order][source].T)
    D = graco.distance_matrix.normalized1_lp(GCV_DG[order][source], 2) / normalizer('normalized1_l2',length)
    D_dict[(order,source)] = D 
    print(length, np.nanmax(D))
    
D = np.nanmean([D_i for D_i in D_dict.values()], axis=0)
np.savetxt(f"{MATRIX_DIRECTORY}/GCV-DG/normalized1-l2_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [None]:
# Normalized1 L_inf
D_dict = {}
for order,source in order_source_DG:
    length = len(GCV_DG[order][source].T)
    D = graco.distance_matrix.normalized1_lp(GCV_DG[order][source], np.inf) / normalizer('normalized1_linf',length)
    D_dict[(order,source)] = D 
    print(length, np.nanmax(D))
    
D = np.nanmean([D_i for D_i in D_dict.values()], axis=0)
np.savetxt(f"{MATRIX_DIRECTORY}/GCV-DG/normalized1-linf_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')