In [53]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough
# LangChain supports many other chat models. Here, we're using Ollama
from langchain_community.chat_models import ChatOllama
from langchain_core.documents import Document
from dotenv import load_dotenv

In [54]:
load_dotenv()

graph = Neo4jGraph()

In [13]:
import pandas as pd
recipe_df = pd.read_csv("hf://datasets/Hieu-Pham/kaggle_food_recipes/Food Ingredients and Recipe Dataset with Image Name Mapping.csv")

In [14]:
recipe_df = recipe_df[['Title', 'Instructions', 'Cleaned_Ingredients']]
recipe_df.head()

Unnamed: 0,Title,Instructions,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...","['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,"['2 large egg whites', '1 pound new potatoes (..."
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,Newton's Law,Stir together brown sugar and hot water in a c...,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."


In [25]:
recipe_df

Unnamed: 0,Title,Instructions,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...","['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher..."
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,"['2 large egg whites', '1 pound new potatoes (..."
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,"['1 cup evaporated milk', '1 cup whole milk', ..."
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,"['1 (¾- to 1-pound) round Italian loaf, cut in..."
4,Newton's Law,Stir together brown sugar and hot water in a c...,"['1 teaspoon dark brown sugar', '1 teaspoon ho..."
...,...,...,...
13496,Brownie Pudding Cake,Preheat the oven to 350°F. Into a bowl sift to...,"['1 cup all-purpose flour', '2/3 cup unsweeten..."
13497,Israeli Couscous with Roasted Butternut Squash...,Preheat oven to 475°F.\nHalve lemons and scoop...,"['1 preserved lemon', '1 1/2 pound butternut s..."
13498,Rice with Soy-Glazed Bonito Flakes and Sesame ...,"If using katsuo bushi flakes from package, moi...",['Leftover katsuo bushi (dried bonito flakes) ...
13499,Spanakopita,Melt 1 tablespoon butter in a 12-inch heavy sk...,['1 stick (1/2 cup) plus 1 tablespoon unsalted...


In [15]:
recipe_dict = recipe_df.to_dict(orient='records')

In [16]:
import json
# Convert to string
recipe_ls=[]
for i in recipe_dict:
    str_obj = json.dumps(i)
    recipe_ls.append(str_obj)

#### text2graph

In [29]:
def text2graphdoc(text):
    document = Document(page_content=text)
    return document

documents = []
for i in recipe_ls[:1]:
    document = text2graphdoc(i)
    documents.append(document)

# llm = ChatOllama(model="llama3")
# llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125")
llm=ChatOpenAI(temperature=0, model_name="gpt-4o") 
llm_transformer = LLMGraphTransformer(llm=llm)

graph_documents = llm_transformer.convert_to_graph_documents(documents)
# graph.add_graph_documents(
#     graph_documents,
#     baseEntityLabel=True,
#     include_source=True
# )

In [42]:
graph_documents[0].__doc__

'Represents a graph document consisting of nodes and relationships.\n\n    Attributes:\n        nodes (List[Node]): A list of nodes in the graph.\n        relationships (List[Relationship]): A list of relationships in the graph.\n        source (Document): The document from which the graph information is derived.\n    '

In [39]:
graph_documents[0].relationships

[Relationship(source=Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'), target=Node(id='Chicken', type='Ingredient'), type='CONTAINS'),
 Relationship(source=Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'), target=Node(id='Salt', type='Ingredient'), type='CONTAINS'),
 Relationship(source=Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'), target=Node(id='Acorn Squash', type='Ingredient'), type='CONTAINS'),
 Relationship(source=Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'), target=Node(id='Sage', type='Ingredient'), type='CONTAINS'),
 Relationship(source=Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'), target=Node(id='Rosemary', type='Ingredient'), type='CONTAINS'),
 Relationship(source=Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'), target=Node(id='Butter', type='Ingredient'), type='CONTAINS'),

In [40]:
graph_documents[0].nodes

[Node(id='Miso-Butter Roast Chicken With Acorn Squash Panzanella', type='Recipe'),
 Node(id='Chicken', type='Ingredient'),
 Node(id='Salt', type='Ingredient'),
 Node(id='Acorn Squash', type='Ingredient'),
 Node(id='Sage', type='Ingredient'),
 Node(id='Rosemary', type='Ingredient'),
 Node(id='Butter', type='Ingredient'),
 Node(id='Allspice', type='Ingredient'),
 Node(id='Red Pepper Flakes', type='Ingredient'),
 Node(id='Black Pepper', type='Ingredient'),
 Node(id='Bread', type='Ingredient'),
 Node(id='Apples', type='Ingredient'),
 Node(id='Olive Oil', type='Ingredient'),
 Node(id='Red Onion', type='Ingredient'),
 Node(id='Apple Cider Vinegar', type='Ingredient'),
 Node(id='White Miso', type='Ingredient'),
 Node(id='All-Purpose Flour', type='Ingredient'),
 Node(id='White Wine', type='Ingredient'),
 Node(id='Chicken Broth', type='Ingredient'),
 Node(id='Instructions', type='Instructions')]

In [41]:
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

#### df2graph

In [9]:
test_recipe = recipe_df.head()

In [28]:
import re
import pandas as pd
import networkx as nx
from py2neo import Graph, Node, Relationship
import ast

# 初始化圖譜
G = nx.Graph()

# 提取成分名稱和份量的輔助函數
def parse_ingredients(ingredient_string):
    ingredients = ast.literal_eval(ingredient_string)
    parsed_ingredients = []
    for ingredient in ingredients:
        # 使用正則表達式來拆解成分名稱和份量
        match = re.match(r"([\d\/\s\.\½]+)?\s*(oz\.|teaspoons?|tablespoons?|cups?|lb\.|g|kg|ml|l)?\s*(.*)", ingredient.strip())
        if match:
            amount, unit, name = match.groups()
            amount = amount.strip() if amount else '1'
            unit = unit.strip() if unit else ''
            name = name.strip()
            # 處理數量的簡單轉換
            if '½' in amount:
                amount = amount.replace('½', '0.5')
            try:
                amount = float(eval(amount))
            except:
                amount = 1
            parsed_ingredients.append((name, f"{amount} {unit}".strip()))
        else:
            parsed_ingredients.append((ingredient.strip(), ''))
    return parsed_ingredients

In [32]:
# 連接到 Neo4j Aura 並創建圖譜
NEO4J_URI = "neo4j+s://f0372b25.databases.neo4j.io"
NEO4J_USER = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

# 添加食譜和成分到圖譜中
for index, row in test_recipe.iterrows():
    recipe_title = row['Title']
    recipe_instructions = row['Instructions']
    recipe_node = {"title": recipe_title, "instructions": recipe_instructions}  # 將屬性存儲在字典中
    G.add_node(recipe_title, **recipe_node)  # 使用屬性字典添加節點

    ingredients = parse_ingredients(row['Cleaned_Ingredients'])
    for name, amount in ingredients:
        ingredient_node = {"name": name.lower()}
        if not G.has_node(name.lower()):
            G.add_node(name.lower(), **ingredient_node)  # 使用屬性字典添加節點
        G.add_edge(recipe_title, name.lower(), relationship="CONTAINS", amount=amount)

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def clear_database(tx):
    tx.run("MATCH (n) DETACH DELETE n")

def create_node(tx, label, properties):
    tx.run(f"CREATE (n:{label} $properties)", properties=properties)

def create_relationship(tx, start_node, end_node, relationship, properties):
    tx.run(f"""
    MATCH (a:{start_node[0]} {{title: $start_title}})
    MATCH (b:{end_node[0]} {{name: $end_name}})
    CREATE (a)-[r:{relationship} $properties]->(b)
    """, start_title=start_node[1]['title'], end_name=end_node[1]['name'], properties=properties)

# 清空現有數據（選擇性）
with driver.session() as session:
    session.write_transaction(clear_database)

# 添加節點和邊到 Neo4j
with driver.session() as session:
    for node_key in G.nodes:
        node = G.nodes[node_key]  # 直接檢索節點屬性字典
        if 'instructions' in node:
            session.write_transaction(create_node, "Recipe", {"title": node_key, "instructions": node['instructions']})
        else:
            session.write_transaction(create_node, "Ingredient", {"name": node_key})

    for edge in G.edges(data=True):
        recipe_node = ("Recipe", {"title": edge[0]})
        ingredient_node = ("Ingredient", {"name": edge[1]})
        session.write_transaction(create_relationship, recipe_node, ingredient_node, edge[2]['relationship'], {"amount": edge[2]['amount']})

print("知識圖譜建立完成！")

  session.write_transaction(clear_database)
  session.write_transaction(create_node, "Recipe", {"title": node_key, "instructions": node['instructions']})
  session.write_transaction(create_node, "Ingredient", {"name": node_key})
  session.write_transaction(create_relationship, recipe_node, ingredient_node, edge[2]['relationship'], {"amount": edge[2]['amount']})


知識圖譜建立完成！


#### search

In [43]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)



In [44]:
graph = Neo4jGraph()
# Retriever

graph.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")

# Extract entities from text
class Entities(BaseModel):
    """Identifying information about entities."""

    names: List[str] = Field(
        ...,
        description="All the recipe, food, ingredient, nutrition that "
        "appear in the text",
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following "
            "input: {question}",
        ),
    ]
)
# llm=ChatOllama(model="llama3")
llm=ChatOpenAI(temperature=0, model_name="gpt-4o") 
entity_chain = prompt | llm.with_structured_output(Entities)

In [45]:
entity_chain.invoke({"question": "烤馬鈴薯怎麼做？"}).names

['烤馬鈴薯']

In [46]:
def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text search.
    It processes the input string by splitting it into words and appending a
    similarity threshold (~2 changed characters) to each word, then combines
    them using the AND operator. Useful for mapping entities from user questions
    to database values, and allows for some misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

# Fulltext index query
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
                WITH node
                MATCH (i:Ingredient)<-[:CONTAINS]-(r:Recipe)
                WHERE i.name IN $ingredient_list
                RETURN r.title + ' - ' + r.instructions AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [47]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [48]:
# Condense a chat history and follow-up question into a standalone question
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [55]:
template = """請回答下方問題
{context}

問題: {question}
請使用中文回答
答案:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [56]:
chain.invoke({"question": "how to cook Potatoes?"})

Search query: how to cook Potatoes?


ClientError: {code: Neo.ClientError.Statement.ParameterMissing} {message: Expected parameter(s): ingredient_list}