In [28]:
from neo4j import GraphDatabase
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import numpy as np

In [29]:
# DB接続処理
# Neo4jに接続
uri = "bolt://localhost:7687"
user = "neo4j"
password = "abcd7890"

# ドライバを作成
driver = GraphDatabase.driver(uri, auth=(user, password))

In [30]:
# データ読み込み
molecules = pd.read_csv('../datas/molecules.csv', names=['id', 'none', 'pubchem_id', 'common_name', 'flavor_profile'], header=0)
flavor_db = pd.read_csv('../datas/flavordb.csv', names=['id', 'entry_id', 'alias', 'synonyms', 'scientific_name', 'category', 'molecules'], header=0)
flavor_db.drop(['id'], axis=1, inplace=True)

In [31]:
molecules.tail(20)

Unnamed: 0,id,none,pubchem_id,common_name,flavor_profile
1771,1771,1771.0,23676745,Potassium Sorbate,{'odorless'}
1772,1772,1772.0,24832101,"Santalol, alpha- and beta-","{'sandalwood', 'sweet', 'woody', 'deep'}"
1773,1773,1773.0,25021769,"2-Propen-1-one, 3-(4-hydroxyphenyl)-1-phenyl-",{'bitter'}
1774,1774,1774.0,44229138,(RS)-norcoclaurinium,"{'milky', 'sweet', 'fruity'}"
1775,1775,1775.0,46779070,S-Methyl 4-methylpentanethioate,{''}
1776,1776,1776.0,53425122,1-(Ethyltrisulfanyl)propane,"{'onion', 'alliaceous', 'green', 'garlic'}"
1777,1777,1777.0,53472027,D-Isoleucine Methyl Ester Hydrochloride,{''}
1778,1778,1778.0,54670067,l-ascorbic acid,{''}
1779,1779,1779.0,57346909,"4H-Pyrrolo[2,1-d]-1,3,5-dithiazine,tetrahydro-...",{''}
1780,1780,1780.0,57357963,33368-82-0,"{'sulfurous', 'alliaceous'}"


In [32]:
flavor_db.tail(2)

{}

Unnamed: 0,entry_id,alias,synonyms,scientific_name,category,molecules
933,977,guinea hen,"{'pet speckled hen', 'original fowl', 'guinea ...",galliformes,meat,"{644104, 1130}"
934,978,cucurbita,{'cucurbita'},cucurbita,gourd,"{644104, 527, 8723, 31260, 15394, 6184, 65064,..."


In [39]:
# Make all list of all molecules

## Molecule ノードが存在するか確認し、存在すれば削除する関数
def initialize_molecules(tx):
    # Molecule ノードの存在を確認
    result = tx.run("MATCH (m:Molecule) RETURN m LIMIT 1")
    if result.single():
        tx.run("MATCH (m:Molecule) DETACH DELETE m")
    
## Moleculeに値を投入する関数
def insert_molecule(tx, molecule_data):
    # flavor_profileがリスト形式の場合、直接使用
    flavor_str = molecule_data['flavor_profile']
    flavors = flavor_str.replace("'", "").strip('{}').split(', ')

    tx.run("""
    CREATE (m:Molecule {
        id: $id,
        pubchem_id: $pubchem_id,
        common_name: $common_name,
        flavor_profile: $flavor_profile
    })
    """, 
    id=molecule_data['id'], 
    pubchem_id=molecule_data['pubchem_id'],
    common_name=molecule_data['common_name'],
    flavor_profile=flavors)

def append_molecule_index(tx):
    # idに対して一意制約を追加（構文修正済み）
    tx.run("CREATE CONSTRAINT IF NOT EXISTS FOR (m:Molecule) REQUIRE m.id IS UNIQUE")
    
    # pubchem_idにインデックスを作成（構文修正済み）
    tx.run("CREATE INDEX IF NOT EXISTS FOR (m:Molecule) ON (m.pubchem_id)")
    
    # flavor_profileにインデックスを作成（構文修正済み）
    tx.run("CREATE INDEX IF NOT EXISTS FOR (m:Molecule) ON (m.flavor_profile)")

# Moleculesのデータを挿入
with driver.session() as session:
    session.execute_write(initialize_molecules)
    for i, molecule in molecules.iterrows():
        session.execute_write(insert_molecule, molecule)
    session.execute_write(append_molecule_index)


In [50]:
# Make all list of all entries

## Molecule ノードが存在するか確認し、存在すれば削除する関数
def initialize_entry(tx):
    # Molecule ノードの存在を確認
    result = tx.run("MATCH (m:Entry) RETURN m LIMIT 1")
    if result.single():
        tx.run("MATCH (m:Entry) DETACH DELETE m")
    
## Moleculeに値を投入する関数
def insert_entry(tx, entry_data):
    molecules_str = entry_data['molecules']
    molecules = molecules_str.replace("'", "").strip('{}').split(', ')
    molecules = [s for s in molecules if s.strip()]
	
    synonyms_str = entry_data['synonyms']
    synonyms = synonyms_str.replace("'", "").strip('{}').split(', ')
    search_query =  ' '.join(synonyms) + ' ' + str(entry_data['scientific_name']) + ' ' + str(entry_data['category'])
 
    tx.run("""
    CREATE (e:Entry {
        id: $entry_id,
        alias: $alias,
        synonyms: $synonyms,
        scientific_name: $scientific_name,
        category: $category,
        search_query: $search_query
    })
    """, 
    entry_id=entry_data['entry_id'],
    alias=entry_data['alias'],
    synonyms=entry_data['synonyms'],
    scientific_name=entry_data['scientific_name'],
    category=entry_data['category'],
    search_query=search_query)
    
    for molecule_id in molecules:
        tx.run("""
        MERGE (m:Molecule {id: $molecule_id})  // Moleculeが存在しない場合も作成
        MERGE (e:Entry {id: $entry_id})  // Entryも再度確認
        MERGE (e)-[:CONTAINS]->(m)
        """, 
        entry_id=entry_data['entry_id'],
        molecule_id=molecule_id)

def append_entry_index(tx):
    # idに対して一意制約を追加（構文修正済み）
    tx.run("CREATE CONSTRAINT IF NOT EXISTS FOR (m:Entry) REQUIRE m.id IS UNIQUE")
    
    # moleculesにインデックスを作成（構文修正済み）
    tx.run("CREATE INDEX IF NOT EXISTS FOR (m:Entry) ON (m.molecules)")

    # moleculesにインデックスを作成（構文修正済み）
    tx.run("CREATE FULLTEXT INDEX my_text_index FOR (n:Molecule) ON EACH [n.search_query]")
    
# Moleculesのデータを挿入
with driver.session() as session:
    session.execute_write(initialize_entry)
    for i, molecule in flavor_db.iterrows():
        session.execute_write(insert_entry, molecule)
    session.execute_write(append_entry_index)


KeyboardInterrupt: 

In [25]:
# 

In [26]:
# ドライバをクローズ
driver.close()