In [31]:
from neo4j import GraphDatabase
import pandas as pd
from openai import OpenAI
import numpy as np
from gensim.models import Word2Vec
from sklearn.decomposition import PCA
import json
from collections import Counter
import mysql.connector
import os
import re
from dotenv import load_dotenv
load_dotenv()

True

In [32]:
# DB接続処理
# Neo4jに接続
uri = "bolt://localhost:7687"
user = "neo4j"
password = "abcd7890"

# ドライバを作成
driver = GraphDatabase.driver(uri, auth=(user, password))

In [33]:
# DB接続処理
# MySQLに接続
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="password",
    database="foodb"
)

In [34]:
# ChatGPTを使うのでAPIキーを設定
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
gpt_client = OpenAI(
    api_key=OPENAI_API_KEY
)

In [52]:
# ChatGPTを使って意味のある文章を生成する
def get_gpt_result(prompt):
    response = gpt_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content.strip()


In [53]:
# Word2Vecモデルを学習
#model = Word2Vec(sentences=all_flavors, vector_size=100, window=5, min_count=1, sg=1)
model = Word2Vec.load("../datas/fine_tuned_word2vec.model")

# フレーバーのベクトルを確認
green_vector = model.wv['green']
print(f'{model.vector_size}, {len(green_vector)}')

# similar to の結果をjsonで返す
def similar_to_json(word):
    try:    
        similar_words = model.wv.most_similar(positive=[word])
        similar_words_dict = {word: score for word, score in similar_words}
        json_data = json.dumps(similar_words_dict, indent=4)
        return json_data
    except KeyError:
        return json.dumps({})

# vectorを返す
def get_word_vector(word):
    try:    
        return model.wv[word]
    except KeyError:
        print("error key: ", word)
        return [0.0] * model.vector_size 
        


100, 100


In [54]:
# テキストフォーマットの指定
def format_text(text):
    # 連続する空白を削除
    text = re.sub(r'\s+', ' ', text)
    text = text.replace('-', ' ')
    text = text.replace(' ', '_')
    # 改行を削除
    text = text.replace('\n', '')
    text = text.lower()
    return text

In [59]:
#
# GroupとSubGroupを初期化
#
# 古いデータを削除してリセット
def initialize_groups(tx):
    # Molecule ノードの存在を確認
    tx.run("MATCH (f:FoodGroup) DETACH DELETE f;")
    tx.run("MATCH (f:FoodSubGroup) DETACH DELETE f;")

# 新規レコードを作成
def create_groups(tx, node, group_name):
    if group_name is None or group_name == "":
        return
    
    vec_key = format_text(group_name)
    most_similar = similar_to_json(vec_key)
    word_vector = get_word_vector(vec_key)
            
    query = f"""
        MERGE (m:{node} {{
            id: $id,
            name: $name,
            most_similar_json: $most_similar_json,
            word_vector: $word_vector
        }})
        """
    
    tx.run(query,
        id=vec_key,
        name=group_name,
        most_similar_json=most_similar,
        word_vector=word_vector)
    
 # グループとサブグループの関連付け
def create_group_connections(tx, group_name, subgroup_name):
    tx.run("""
        MATCH (f:FoodGroup {id: $group_name})
        MATCH (s:FoodSubGroup {id: $subgroup_name})
        MERGE (f)-[:CONTAINS]->(s)
        """,
        group_name=format_text(group_name),
        subgroup_name=format_text(subgroup_name))

# インデックス追加
def append_group_index(tx):
    tx.run("CREATE CONSTRAINT IF NOT EXISTS FOR (f:FoodGroup) REQUIRE f.id IS UNIQUE")
    tx.run("CREATE CONSTRAINT IF NOT EXISTS FOR (f:FoodSubGroup) REQUIRE f.id IS UNIQUE")

# １行つづ検索してデータを投入
with driver.session() as session:
    session.execute_write(initialize_groups)
    
    query = "select food_group, food_subgroup from foods where food_group is not null group by food_group, food_subgroup"
    cursor1 = connection.cursor(dictionary=True)
    cursor1.execute(query)
    for row in cursor1.fetchall():
        session.execute_write(create_groups, "FoodGroup", row["food_group"])
        session.execute_write(create_groups, "FoodSubGroup", row["food_subgroup"])
        session.execute_write(create_group_connections, row["food_group"], row["food_subgroup"])
    session.execute_write(append_group_index)

error key:  tropical_fruits
error key:  root_vegetables
error key:  leaf_vegetables
error key:  oilseed_crops
error key:  other_pulses
error key:  stalk_vegetables
error key:  fermented_beverages
error key:  other_breads
error key:  cereal_products
error key:  soy_products
error key:  distilled_beverages
error key:  fortified_wines
error key:  alcoholic_beverages
error key:  other_vegetables
error key:  other_aquatic_foods
error key:  fermented_milk_products
error key:  unfermented_milks
error key:  frozen_desserts
error key:  other_confectioneries
error key:  other_dishes
error key:  flat_breads
error key:  other_milk_products
error key:  ground_meat
error key:  fruit_products
error key:  fish_products
error key:  other_beverages
error key:  fermented_milks
error key:  leavened_breads
error key:  nutritional_beverages
error key:  tex_mex_cuisine
error key:  milk_desserts
error key:  asian_cuisine
error key:  herbal_teas
error key:  pasta_dishes
error key:  berber_cuisine
error key:  c

In [56]:
#
# Foodを初期化
#
# 古いデータを削除してリセット
def initialize_foods(tx):
    # Molecule ノードの存在を確認
    tx.run("MATCH (f:Food) DETACH DELETE f;")
    tx.run("MATCH (f:FoodSubType) DETACH DELETE f;")

# 新規レコードを作成
def create_foods(tx, node, group_name):
    if group_name is None or group_name == "":
        return
    
    vec_key = format_text(group_name)
    most_similar = similar_to_json(vec_key)
    word_vector = get_word_vector(vec_key)
            
    query = f"""
        MERGE (m:{node} {{
            id: $id,
            name: $name,
            most_similar_json: $most_similar_json,
            word_vector: $word_vector
        }})
        """
    
    tx.run(query,
        id=vec_key,
        name=group_name,
        most_similar_json=most_similar,
        word_vector=word_vector)
    
# グループとサブグループの関連付け
def create_food_connections(tx, group_name, subgroup_name):
    tx.run("""
        MATCH (f:Food {id: $group_name})
        MATCH (s:FoodSubType {id: $subgroup_name})
        MERGE (f)-[:CONTAINS]->(s)
        """,
        group_name=format_text(group_name),
        subgroup_name=format_text(subgroup_name))

# グループとフードの関連付け
def create_food_group_connections(tx, food_name, group_name, subgroup_name):
    tx.run("""
        MATCH (f:FoodGroup {id: $group_name})
        MATCH (s:Food {id: $food_name})
        MERGE (f)-[:CONTAINS]->(s)
        """,
        group_name=format_text(group_name),
        food_name=format_text(food_name))

    tx.run("""
        MATCH (f:FoodSubGroup {id: $subgroup_name})
        MATCH (s:Food {id: $subgroup_name})
        MERGE (f)-[:CONTAINS]->(s)
        """,
        subgroup_name=format_text(subgroup_name),
        food_name=format_text(food_name))
    
# インデックス追加
def append_foods_index(tx):
    tx.run("CREATE CONSTRAINT IF NOT EXISTS FOR (f:Food) REQUIRE m.id IS UNIQUE")
    tx.run("CREATE CONSTRAINT IF NOT EXISTS FOR (f:FoodSubType) REQUIRE m.id IS UNIQUE")

# １行つづ検索してデータを投入
with driver.session() as session:
    session.execute_write(initialize_groups)
    
    query = "select foods.name, foods.name_scientific, foods.picture_file_name, foods.food_group, foods.food_subgroup, foods.food_type, orig_food_common_name from foods left join contents on foods.id = contents.food_id group by foods.name,contents.orig_food_common_name"
    cursor1 = connection.cursor(dictionary=True)
    cursor1.execute(query)
    for row in cursor1.fetchall():
        
        session.execute_write(create_groups, "Food", row["food_group"])
        session.execute_write(create_groups, "FoodSubType", row["food_subgroup"])
        session.execute_write(create_food_connections, row["food_group"], row["food_subgroup"])
        session.execute_write(create_foods, row["name"])

ClientError: {code: Neo.ClientError.Statement.ParameterMissing} {message: Expected parameter(s): vec_key}