In [3]:
%pwd

'c:\\Users\\PrinciaFernandes\\Mresult\\Phenomix\\notebooks'

In [4]:
import os
os.chdir("../")

In [5]:
from src.config import neo4j_uri,neo4j_username,neo4j_password
from neo4j import GraphDatabase
import json

In [6]:
def get_detail_files(dir:str)->list:
    detail_files = []
    websites = ['CPRD','HDRUK','OHDSI','PHEKB','SENTINEL']
    for files in os.listdir(dir):
        if files in websites:
            file_path = os.path.join(dir,files)
            for detail in os.listdir(file_path):
                if '_detail.json' in detail:
                    with open(os.path.join(file_path,detail),'r') as f:
                        file_content = f.read()
                    detail_files.extend(json.loads(file_content))
    return detail_files
detail_file = get_detail_files(r'data\processed')

In [7]:
def get_concept_files(dir:str)->list:
    concept_files = []
    websites = ['CPRD','HDRUK','OHDSI','PHEKB','SENTINEL']
    for files in os.listdir(dir):
        if files in websites:
            file_path = os.path.join(dir,files)
            for detail in os.listdir(file_path):
                if '_concept.json' in detail:
                    with open(os.path.join(file_path,detail),'r') as f:
                        file_content = f.read()
                    concept_files.extend(json.loads(file_content))
    return concept_files
concept_file = get_concept_files(r'data\processed')

In [8]:
import json
with open(r'data\masterlist\Masterlist.json','r') as file:
    f = file.read()
    json_data = json.loads(f)


In [1]:
def push_to_neo4j(json_data,detail_file,concept_file):
    driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))

    with driver.session() as session:
        for item in json_data[:500]:
            phenotype_name = item.get("Phenotypes")
            # print(item.get("id"))
            session.run("""
                CREATE (p:Phenotype {id:$row.id}) 
                SET p += $row
                SET p.name = p.Phenotypes
                REMOVE p.Phenotypes
            """, name=phenotype_name,row = item)
            pid_sources = {
                        'HDRUK': item.get("hdruk_PID"),
                        'PHEKB': item.get("phekb_PID"),
                        'CPRD': item.get("cprd_PID"),
                        'Sentinel': item.get("Sentinel_PID"),
                        'OHDSI': item.get("ohdsi_PID"),
                    }
            # print(pid_sources)

            for source, pid_string in pid_sources.items():
                if pid_string:
                    # print(source)
                    website_pids = [pid.strip() for pid in pid_string.split(",") if pid.strip()]
                    session.run("""
                                CREATE  (w:Website {name: $name, pid: $pids})
                                WITH w
                                MATCH (p:Phenotype {id:$row.id})
                                MERGE (p)-[:HAS_INSTANCE]->(w)
                                    """,name = source,pids = website_pids,row = item)
                    
                    for pid in website_pids:
                        detail_row = next(detail_dictionary for detail_dictionary in detail_file if pid == detail_dictionary['PID'])
                        # print(pid)
                        for key,value in detail_row.items():
                            if isinstance(value,dict):
                                detail_row[key] = json.dumps(value)
                            elif isinstance(value,list):
                                if all(isinstance(subitem, dict) for subitem in value):
                                    detail_row[key] = json.dumps([json.dumps(item) for item in value])
                                else:
                                    detail_row[key] = value if value is not None else []
                            elif value is None or value == 'NA' or value == 'NO_VALUE':
                                detail_row[key] = 'Unknown value'
                        session.run("""
                                CREATE (d:Detail {PID:$detail_row.PID})
                                SET d += $detail_row
                                WITH d
                                MATCH (p:Phenotype {id:$row.id}) 
                                MATCH (w:Website {name:$name})
                                MATCH (p)-[:HAS_INSTANCE]->(w)
                                MERGE (w)-[:HAS_DETAIL]->(d)
                                """,detail_row = detail_row,row = item,name = source)
                        concept_row = [concept_dictionary for concept_dictionary in concept_file if pid in concept_dictionary['PIDs']] 
                        if len(concept_row)<1:
                            continue
                    
                        for concept in concept_row:
                            for key in list(concept.keys()):
                                value = concept[key]
                                if isinstance(value, (dict, list)):
                                    concept[key] = json.dumps(value)
                            session.run("""
                                        CREATE (c:Concept {CID:$concept.CID})
                                        SET c += $concept
                                        WITH c
                                        MATCH (p:Phenotype {id:$row.id}) 
                                        MATCH (w:Website {name:$name})
                                        MATCH (d:Detail {PID:$pid})
                                        MATCH (p)-[:HAS_INSTANCE]->(w)-[:HAS_DETAIL]->(d)
                                        MERGE (d)-[:HAS_CONCEPT]->(c)
                                        """,concept = concept,row = item,name = source,pid = pid)
    
    driver.close()
    return "Phenomix data added to Neo4J"



In [9]:
push_to_neo4j(json_data,detail_file,concept_file)

'Phenomix data added to Neo4J'