In [1]:
from metapaths.starterpack import load_pickle, cypher_triple_to_list, metapath_featset_gen
from metapaths.inf import INFToolbox, query_templates_234, Graph

In [5]:

toolbox = INFToolbox(Graph('bolt://localhost:7687'), query_templates_234)

if toolbox.check_graph_connection('localhost', 7687):

    print('Connection to Neo4j server successful.')

else:

    print('Connection to Neo4j server failed.')

Connection to Neo4j server successful.


In [6]:

toolbox.add_param_combos(
    [
    {}, # add empty INF parameter dict to extract raw metapath counts
    {   # sample populated parameter dict:
        'path_deflator_exp': 0.5,   # deflator exponent
        'inf_inflator': 'product',  # pooled INF aggregation
        'inf_pooling': 'min'        # within-relation INF pooling
        }
        ]
        )

_, reltype_counts_reindexed = toolbox.get_reltype_counts()

In [5]:
reltype_counts_reindexed.iloc[:,:5]

Relation_type,Gene_Gene,Gene_MolecularFunction,Gene_BiologicalProcess,Gene_Disease,Gene_CellularComponent
Count,2088217,96940,558677,86787,73485


In [6]:
reltypes_cypher = ['(:Gene)-[:Gene_Gene]->(:Gene)',
                   '(:Gene)-[:Gene_Pathway]->(:Pathway)',
                   '(:Gene)-[:Gene_Compound]->(:Compound)',
                   '(:Gene)-[:Gene_Disease]->(:Disease)']

cypher_triple_to_list(reltypes_cypher)

[['(:Gene)', '-[:Gene_Gene]->', '(:Gene)'],
 ['(:Gene)', '-[:Gene_Pathway]->', '(:Pathway)'],
 ['(:Pathway)', '<-[:Gene_Pathway]-', '(:Gene)'],
 ['(:Gene)', '-[:Gene_Compound]->', '(:Compound)'],
 ['(:Compound)', '<-[:Gene_Compound]-', '(:Gene)'],
 ['(:Gene)', '-[:Gene_Disease]->', '(:Disease)'],
 ['(:Disease)', '<-[:Gene_Disease]-', '(:Gene)']]

In [7]:
triple_types = cypher_triple_to_list(reltypes_cypher)

metapath_featset_gen('(:Gene)', '(:Disease)', triple_types, [2,3])

['(n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Disease]->(n_target:Disease)',
 '(n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Gene]->(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)',
 '(n_source:Gene)-[r1:Gene_Pathway]->(n_1:Pathway)<-[r2:Gene_Pathway]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)',
 '(n_source:Gene)-[r1:Gene_Compound]->(n_1:Compound)<-[r2:Gene_Compound]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)',
 '(n_source:Gene)-[r1:Gene_Disease]->(n_1:Disease)<-[r2:Gene_Disease]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)']

In [8]:
metapaths = metapath_featset_gen('(:Gene)', '(:Disease)', triple_types, [2,3])

In [9]:
head_tail_pairs = load_pickle('gene_disease_neo4j_export.pkl')

head_tail_pairs.head()

Unnamed: 0,Gene,Disease
0,Gene::79727,Disease::MESH:D009373
1,Gene::7704,Disease::MESH:D009373
2,Gene::7422,Disease::MESH:D009373
3,Gene::6513,Disease::MESH:D009373
4,Gene::4254,Disease::MESH:D009373


In [10]:

transformed_metapath_feats = toolbox.run_pipeline(head_tail_pairs, 
                                                  'Gene', 'Disease',
                                                  metapaths,
                                                  'name',
                                                  reltype_counts_reindexed,
                                                  toolbox.param_combos)

Processing (n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Disease]->(n_target:Disease)...


100%|██████████| 100/100 [00:23<00:00,  4.31it/s]


Processing (n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Gene]->(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)...


100%|██████████| 100/100 [01:54<00:00,  1.15s/it]


Processing (n_source:Gene)-[r1:Gene_Pathway]->(n_1:Pathway)<-[r2:Gene_Pathway]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)...


100%|██████████| 100/100 [00:23<00:00,  4.25it/s]


Processing (n_source:Gene)-[r1:Gene_Compound]->(n_1:Compound)<-[r2:Gene_Compound]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)...


100%|██████████| 100/100 [00:29<00:00,  3.41it/s]


Processing (n_source:Gene)-[r1:Gene_Disease]->(n_1:Disease)<-[r2:Gene_Disease]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)...


100%|██████████| 100/100 [00:08<00:00, 11.40it/s]
100%|██████████| 5/5 [00:00<00:00, 14084.30it/s]
100%|██████████| 5/5 [00:00<00:00, 2093.80it/s]


In [11]:
transformed_metapath_feats[0].head() # raw metapath counts from empty INF parameter dict

Unnamed: 0,(n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Gene]->(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Pathway]->(n_1:Pathway)<-[r2:Gene_Pathway]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Compound]->(n_1:Compound)<-[r2:Gene_Compound]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Disease]->(n_1:Disease)<-[r2:Gene_Disease]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)
Gene::79727_Disease::MESH:D009373,11.0,760.0,21.0,7.0,182.0
Gene::7704_Disease::MESH:D009373,0.0,905.0,16.0,25.0,192.0
Gene::7422_Disease::MESH:D009373,12.0,2172.0,82.0,620.0,1196.0
Gene::6513_Disease::MESH:D009373,0.0,402.0,41.0,177.0,655.0
Gene::4254_Disease::MESH:D009373,8.0,900.0,163.0,48.0,458.0


In [12]:
transformed_metapath_feats[1].head() # INF transformation of metapath counts according to sample populated parameter dict

Unnamed: 0,(n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Gene]->(n_1:Gene)-[r2:Gene_Gene]->(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Pathway]->(n_1:Pathway)<-[r2:Gene_Pathway]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Compound]->(n_1:Compound)<-[r2:Gene_Compound]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease),(n_source:Gene)-[r1:Gene_Disease]->(n_1:Disease)<-[r2:Gene_Disease]-(n_2:Gene)-[r3:Gene_Disease]->(n_target:Disease)
Gene::79727_Disease::MESH:D009373,22.682465,486.721496,55.819267,40.378323,85.948477
Gene::7704_Disease::MESH:D009373,0.0,502.315165,37.821901,64.27481,94.895852
Gene::7422_Disease::MESH:D009373,23.691062,765.089877,59.018997,312.150345,162.687706
Gene::6513_Disease::MESH:D009373,0.0,334.784257,46.82272,166.784085,120.395373
Gene::4254_Disease::MESH:D009373,19.944429,591.209358,83.210608,94.00503,100.675057
