In [4]:
from scipy.spatial.distance import squareform, cdist, pdist
from itertools import combinations
from functools import partial

import os
import graco
import numpy as np
import pandas as pd
import networkx as nx

In [5]:
pd.set_option("display.max_columns", 50)

DATA_DIRECTORY = "/media/clusterduck123/joe/data"
YEAST_DIRECTORY = f"{DATA_DIRECTORY}/processed-data/yeast"
NETWORK_DIRECTORY = f"{YEAST_DIRECTORY}/networks"
MATRIX_DIRECTORY  = f"{YEAST_DIRECTORY}/distance-matrices"

# Distance matrices

In [6]:
all_distances = ['cityblock', 'euclidean', 'chebyshev', 
                 'sqeuclidean', 'canberra', 
                 'normalized1_l1', 'normalized1_l2', 'normalized1_linf', 
                 'normalized2_l1', 'normalized2_l2', 'normalized2_linf', 'mahalanobis', 'seuclidean',
                 'cosine', 'correlation', 'braycurtis']

In [7]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/PPI_BioGRID.txt")
GDV = graco.orbits(PPI_nx)
GCV = graco.coefficients(GDV)

## GDV

In [5]:
feature = 'GDV'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [8]:
for distance in all_distances:
    D = graco.distance_matrix(GDV, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [None]:
D = graco.distance_matrix(GDV, distance)
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/GDV_similarity_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-D

In [5]:
feature = 'GCV-D'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [7]:
for distance in all_distances:
    D = graco.GCV_distance(GCV['D'], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

Exception: Datatype not understood.

In [6]:
D = graco.GCV_distance(GCV['D'], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-A

In [5]:
feature = 'GCV-A'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [6]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [7]:
for distance in all_distances:
    D = graco.GCV_distance(GCV['A'], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-G

In [8]:
feature = 'GCV-G'
feature_matrix = GCV.loc[:,['G']]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [None]:
for distance in {'canberra', 'cityblock'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [9]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-Gsym

In [6]:
feature = 'GCV-Gsym'
feature_matrix = GCV.loc[:,['G']]
del feature_matrix[('G','1-2')]
del feature_matrix[('G','2-1')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [7]:
for distance in {'canberra', 'cityblock'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [8]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DA

In [13]:
feature = 'GCV-DA'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [14]:
for distance in all_distances:
    D = graco.GCV_distance(GCV[['D','A']], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

  return runner(coro)


In [15]:
D = graco.GCV_distance(GCV[['D','A']], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DG

In [16]:
feature = 'GCV-DG'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [17]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [18]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DG-short

In [14]:
feature = 'GCV-DC-short'
feature_matrix = GCV.loc[:,['D','G']]
del feature_matrix[('G','1-2')]
del feature_matrix[('G','2-1')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [15]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [16]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-all

In [19]:
feature = 'GCV-all'
feature_matrix = GCV.loc[:,['D','A','G']]
del feature_matrix[('G','0-0')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [20]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [21]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-nonredundant

In [5]:
feature = 'GCV-nonredundant'
feature_matrix = GCV.loc[:,['D','A','G']]
del feature_matrix[('G','0-0')]
del feature_matrix[('G','1-1')]
del feature_matrix[('G','1-2')]
del feature_matrix[('G','2-1')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [23]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [24]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-orca

In [7]:
feature = 'GCV-orca'
feature_matrix = GCV['O']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [8]:
for distance in {'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-orca+

In [9]:
feature = 'GCV-orca+'
feature_matrix = GCV.loc[:,['O','A','D']]

del feature_matrix[('A','1')]
del feature_matrix[('A','2')]
del feature_matrix[('A','3')]

del feature_matrix[('D','1')]
del feature_matrix[('D','2')]
del feature_matrix[('D','3')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [12]:
for distance in {'hellinger'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')