In [1]:
from scipy.spatial.distance import squareform, cdist
from functools import partial

import os
import graco
import numpy as np
import pandas as pd
import networkx as nx

In [2]:
pd.set_option("display.max_columns", 50)

DATA_DIRECTORY = "/media/clusterduck123/joe/data"
HUMAN_DIRECTORY = f"{DATA_DIRECTORY}/processed-data/human"
NETWORK_DIRECTORY = f"{HUMAN_DIRECTORY}/networks"
MATRIX_DIRECTORY  = f"{HUMAN_DIRECTORY}/distance-matrices"

# Distance matrices

In [3]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/PPI_BioGRID.txt")
GDV = graco.orbits(PPI_nx)
GCV = graco.coefficients(GDV)

## GDV

## GCV-D

In [4]:
feature = 'GCV-D'
feature_matrix = GCV['D']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [5]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-A

In [6]:
feature = 'GCV-A'
feature_matrix = GCV['A']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [7]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-G

In [8]:
feature = 'GCV-G'
feature_matrix = GCV['G']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [9]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DA

In [4]:
feature = 'GCV-DA'
feature_matrix = GCV[['D','A']]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [5]:
for distance in {'hellinger'}:
    D = graco.GCV_distance_matrix(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')
    del D

## GCV-DG

In [4]:
feature = 'GCV-DG'
feature_matrix = GCV[['D','G']]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [5]:
for distance in {'canberra', 'cityblock', 'hellinger'}:
    D = graco.GCV_distance_matrix(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')
    del D