In [1]:
from scipy.spatial.distance import squareform, cdist, pdist
from itertools import combinations
from functools import partial

import os
import graco
import numpy as np
import pandas as pd
import networkx as nx

In [2]:
pd.set_option("display.max_columns", 50)

DATA_DIRECTORY = "/media/clusterduck123/joe/data"
YEAST_DIRECTORY = f"{DATA_DIRECTORY}/processed-data/yeast"
NETWORK_DIRECTORY = f"{YEAST_DIRECTORY}/networks"
MATRIX_DIRECTORY  = f"{YEAST_DIRECTORY}/distance-matrices"

# Distance matrices

In [3]:
all_distances = ['cityblock', 'euclidean', 'chebyshev', 
                 'sqeuclidean', 'canberra', 
                 'normalized1_l1', 'normalized1_l2', 'normalized1_linf', 
                 'normalized2_l1', 'normalized2_l2', 'normalized2_linf', 'mahalanobis', 'seuclidean',
                 'cosine', 'correlation', 'braycurtis']

In [4]:
PPI_nx = nx.read_edgelist(f"{NETWORK_DIRECTORY}/PPI_BioGRID.txt")
GDV = graco.orbits(PPI_nx)
GCV = graco.coefficients(GDV)

## GDV

In [5]:
feature = 'GDV'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [8]:
for distance in all_distances:
    D = graco.distance_matrix(GDV, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [None]:
D = graco.distance_matrix(GDV, distance)
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/GDV_similarity_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-D

In [5]:
feature = 'GCV-D'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [7]:
for distance in all_distances:
    D = graco.GCV_distance(GCV['D'], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

Exception: Datatype not understood.

In [6]:
D = graco.GCV_distance(GCV['D'], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-A

In [5]:
feature = 'GCV-A'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [6]:
D = graco.GCV_distance(GCV['A'], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [6]:
for distance in all_distances:
    D = graco.GCV_distance(GCV['A'], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-G

In [7]:
feature = 'GCV-G'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [8]:
for distance in all_distances:
    D = graco.GCV_distance(GCV['G'], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [8]:
D = graco.GCV_distance(GCV['G'], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-DA

In [9]:
feature = 'GCV-DA'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [10]:
for distance in all_distances:
    D = graco.GCV_distance(GCV[['D','A']], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

  return runner(coro)


In [10]:
D = graco.GCV_distance(GCV[['D','A']], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

  return runner(coro)


## GCV-DG

In [11]:
feature = 'GCV-DG'

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [6]:
for distance in all_distances:
    D = graco.GCV_distance(GCV[['D','G']], distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [12]:
D = graco.GCV_distance(GCV[['D','G']], 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-all1

In [22]:
feature = 'GCV-all1'
feature_matrix = GCV.loc[:,['D','A','G']]
del feature_matrix[('G','0-0')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [23]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [24]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-all2

In [25]:
feature = 'GCV-all2'
feature_matrix = GCV.loc[:,['D','A','G']]
del feature_matrix[('G','0-0')]
del feature_matrix[('G','1-1')]
del feature_matrix[('G','1-2')]
del feature_matrix[('G','2-1')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [26]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

In [27]:
D = graco.GCV_distance(feature_matrix, 'hellinger')
np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/hellinger_BioGRID.txt", D, 
       fmt='%.7f', header=' '.join(PPI_nx), comments='')

## GCV-orca

In [5]:
feature = 'GCV-orca'
feature_matrix = GCV['O']

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [6]:
for distance in all_distances:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')

LinAlgError: Singular matrix

## GCV-orca+

In [18]:
feature = 'GCV-orca+'
feature_matrix = GCV.loc[:,['O','A','D']]

del feature_matrix[('A','1')]
del feature_matrix[('A','2')]
del feature_matrix[('A','3')]

del feature_matrix[('D','1')]
del feature_matrix[('D','2')]
del feature_matrix[('D','3')]

if not os.path.exists(f"{MATRIX_DIRECTORY}/{feature}"):
    os.makedirs(f"{MATRIX_DIRECTORY}/{feature}/")

In [20]:
for distance in {'canberra'}:
    D = graco.GCV_distance(feature_matrix, distance)
    np.savetxt(f"{MATRIX_DIRECTORY}/{feature}/{distance}_BioGRID.txt", D, 
           fmt='%.7f', header=' '.join(PPI_nx), comments='')