# Test Notebook Graph

In [3]:
import pandas as pd
from py2neo import Graph
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px

# host + port
host = 'http://localhost:7474'

# select database name
db_name = 'modelparallel'

# credentials for API
user = 'neo4j'
passwd = '1234'

# connect to database
call_graph = Graph(host, auth=(user, passwd), name=db_name)

## Test functions here

### get molecules with more than 10 outgoing transformations and sort by weight

In [47]:
df_transformations = call_graph.run("""
MATCH (m:Molecule)-[t:HAS_TRANSFORMED_INTO]->(:Molecule)
WITH m, count(t) as count_HTI
MATCH (m2:Molecule)-[t:HAS_TRANSFORMED_INTO]->(:Molecule)
WHERE m.formula_string = m2.formula_string
AND m.sample_id = m2.sample_id
AND count_HTI >= 10
RETURN m2.formula_string as formula_string, m2.sample_id as sample_id, count_HTI as outgoing_transformations, collect({transformation_unit: t.transformation_unit, weight_combined: t.combined_weight, weight_connected: t.connected_weight}) as transformation_details
""").to_data_frame()

In [48]:
df_transformations = df_transformations.sort_values(by=['outgoing_transformations'], ascending=False).reset_index(drop=True)

In [50]:
full_sorted_list = []
for row in df_transformations.itertuples():
    sorted_list = sorted(row.transformation_details, key=lambda d: d['weight_connected'], reverse=True)
    #print(sorted_list)
    sorted_transformation_list = []
    for ele in sorted_list:
        sorted_transformation_list.append(ele['transformation_unit'])
    full_sorted_list.append(sorted_transformation_list)
df_transformations['sorted_transformations_connected_weight'] = full_sorted_list

In [55]:
df_transformations.to_csv('molecules_outgoing_transformations.csv', sep=',', encoding='utf-8', index=False)