In [1]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph

In [3]:
graph = Neo4jGraph(
    url="bolt://localhost:7687", username="neo4j", password="pleaseletmein"
)

In [6]:
chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True,
)

In [7]:
chain.run("What recipe has the most ingredients?")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (r:Recipie)-[:HAS_INGREDIENT]->(i:Ingredient)
RETURN r.title, COUNT(i) AS ingredientCount
ORDER BY ingredientCount DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'r.title': 'Enchiladas med kylling', 'ingredientCount': 99}][0m

[1m> Finished chain.[0m


'The recipe with the most ingredients is "Enchiladas med kylling" with a total of 99 ingredients.'

In [7]:
from langchain.prompts.prompt import PromptTemplate


CYPHER_GENERATION_TEMPLATE = """
Task:Generate Cypher statement to query a graph database.
Instructions:
You are a recipe recommender system that help users to find weekly dinner menus that match their preferences. The user can ask you about single recipes or about weekly menus that match their preferences. 
Use only the provided relationship types and properties in the schema.
All properties are in Norwegian.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Cypher examples:
# What days are included in a Menu?
MATCH (r:Recipie)-[f:IS_PART_OF_MENU]->(:menu)
RETURN DISTINCT f.weekDay
# How many recipes has been a part of a weekly menu on a Monday?
MATCH (:Recipie)-[f:IS_PART_OF_MENU {{weekDay: 'Mandag'}}]->(:Menu)
RETURN COUNT(f)
# For giving menu recommendations, use KNN on the graphSAGE embeddings of the recipes.
CALL gds.knn.stream('recipe-graph', {{
    topK: 6,
    nodeProperties: ['graphSAGE'],
    // The following parameters are set to produce a deterministic result
    randomSeed: 1337,
    concurrency: 1,
    sampleRate: 1.0,
    deltaThreshold: 0.0
}})
YIELD node1, node2, similarity
RETURN gds.util.asNode(node1).title AS Recipe1, gds.util.asNode(node2).title AS Recipe2, similarity
ORDER BY similarity DESCENDING, Recipe1, Recipe2

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

The question is:
{question}"""
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

In [8]:
chain_language_example = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [9]:
chain_language_example.run("""
How many recipes has been a part of a weekly menu on a Monday?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (:Recipe)-[f:IS_PART_OF_MENU {weekDay: 'Mandag'}]->(:Menu)
RETURN COUNT(f)[0m
Full Context:
[32;1m[1;3m[{'COUNT(f)': 141}][0m

[1m> Finished chain.[0m


'There have been 141 recipes that have been a part of a weekly menu on a Monday.'

In [14]:
chain_language_example.run("""
    Give me a menu recommendation with lasagne. 
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (r:Recipe)-[:HAS_INGREDIENT]->(i:Ingredient {title: 'lasagne'})
MATCH (r)-[:IS_PART_OF_MENU]->(m:Menu)
WITH m, COUNT(DISTINCT r) AS recipeCount
WHERE recipeCount >= 3
RETURN m.week, m.year[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I don't have any information to provide a menu recommendation with lasagne."

In [13]:
print('Based on the provided information, here is a menu recommendation using the KNN algorithm:\n\n1. Nakkekoteletter med middelhavsgrønnsaker (Pork neck chops with Mediterranean vegetables)\n2. Spicy tacosalat (Spicy taco salad)\n3. Kyllingtaco med granateple og mango (Chicken taco with pomegranate and mango)\n4. Red curry med ørret (Red curry with trout)\n5. Pasta med avokadokrem (Pasta with avocado cream)\n6. Tradisjonell fårikål (Traditional lamb and cabbage stew)\n7. Grønnsakslasagne med paprika og spinat (Vegetable lasagna with bell pepper and spinach)\n8. Kjøttfri juleburger (Meat-free Christmas burger)\n9. Kyllingfilet med ratatouille (Chicken fillet with ratatouille)\n10. Pasta med pesto og ricotta (Pasta with pesto and ricotta)\n\nPlease note that the menu recommendation is based on the similarity scores calculated by the KNN algorithm. Enjoy your meal!')

Based on the provided information, here is a menu recommendation using the KNN algorithm:

1. Nakkekoteletter med middelhavsgrønnsaker (Pork neck chops with Mediterranean vegetables)
2. Spicy tacosalat (Spicy taco salad)
3. Kyllingtaco med granateple og mango (Chicken taco with pomegranate and mango)
4. Red curry med ørret (Red curry with trout)
5. Pasta med avokadokrem (Pasta with avocado cream)
6. Tradisjonell fårikål (Traditional lamb and cabbage stew)
7. Grønnsakslasagne med paprika og spinat (Vegetable lasagna with bell pepper and spinach)
8. Kjøttfri juleburger (Meat-free Christmas burger)
9. Kyllingfilet med ratatouille (Chicken fillet with ratatouille)
10. Pasta med pesto og ricotta (Pasta with pesto and ricotta)

Please note that the menu recommendation is based on the similarity scores calculated by the KNN algorithm. Enjoy your meal!


## Project the graph to a homogeneous Recipe graph

In [25]:
!pip install -q graphdatascience





In [3]:
from graphdatascience import GraphDataScience

host = "bolt://localhost:7687"
user = "neo4j"
password = "pleaseletmein"

gds = GraphDataScience(host, auth=(user, password))

In [63]:
G, metadata = gds.graph.project(
    "recipes", "Recipe", ["IS_IN_SAME_MENU", "SHARE_INGREDIENTS"], nodeProperties=["openaiEmbeddings"]
)

Loading:   0%|          | 0/100 [00:00<?, ?%/s]

In [64]:
metadata

nodeProjection            {'Recipe': {'label': 'Recipe', 'properties': {...
relationshipProjection    {'SHARE_INGREDIENTS': {'aggregation': 'DEFAULT...
graphName                                                           recipes
nodeCount                                                               721
relationshipCount                                                    292242
projectMillis                                                          1186
Name: 0, dtype: object

In [47]:
gds.graph.drop(G)

graphName                                                          recipes
database                                                             neo4j
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                             2649
relationshipCount                                                     8872
configuration            {'relationshipProjection': {'HAS_INGREDIENT': ...
density                                                           0.001265
creationTime                           2023-10-15T13:39:59.548608000+00:00
modificationTime                       2023-10-15T13:40:05.791276900+00:00
schema                   {'graphProperties': {}, 'nodes': {'Menu': {}, ...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Menu': {}, ...
Name: 0, dtype: object

In [65]:
gds.nodeSimilarity.mutate(
    G, topK=2000, mutateProperty="score", mutateRelationshipType="IS_SIMILAR"
)

NodeSimilarity:   0%|          | 0/100 [00:00<?, ?%/s]

preProcessingMillis                                                       0
computeMillis                                                           843
mutateMillis                                                           1154
postProcessingMillis                                                     -1
nodesCompared                                                           721
relationshipsWritten                                                 516782
similarityDistribution    {'min': 0.0019685029983520508, 'p5': 0.0259259...
configuration             {'mutateProperty': 'score', 'jobId': '04cee4a6...
Name: 0, dtype: object

In [66]:
# We use the WCC algorithm to see how many components we have
wcc_result = gds.wcc.mutate(G, mutateProperty="wcc_component")

print(wcc_result[["computeMillis", "componentCount"]])

computeMillis     84
componentCount     1
Name: 0, dtype: object


## GraphSAGE on projected Recipe graph
We can project the graph to a homogeneous Recipe graph and run GraphSAGE on it to get embeddings. 
To achieve this we can use the meta-paths IN_SAME_MENU and SHARE_RECIPES.

In [67]:
%%time
gds.beta.graphSage.train(
    G,
    modelName="RecipeModel",
    embeddingDimension=256,
    sampleSizes=[10, 10],
    searchDepth=15,
    epochs=20,
    learningRate=0.0001,
    activationFunction="RELU",
    aggregator="MEAN",
    featureProperties=["openaiEmbeddings"],
    batchSize=10,
)

GraphSageTrain:   0%|          | 0/100 [00:00<?, ?%/s]

CPU times: total: 11.2 s
Wall time: 3min 27s


(GraphSageModel({'modelInfo': {0: {'modelName': 'RecipeModel', 'modelType': 'graphSage', 'metrics': {'didConverge': False, 'ranIterationsPerEpoch': [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], 'iterationLossesPerEpoch': [[26.56099679621341, 26.554933417058617, 26.54626162283551, 26.540211814659337, 26.52392837100735, 26.508419674260267, 26.490737726871327, 26.453854335717985, 26.416578565689413, 26.36893115624394], [26.340361609196645, 26.300589833324956, 26.1924555850667, 26.00708860974105, 25.83041175455737, 25.621888528981867, 25.282709144224196, 24.9013981902724, 24.20320533679919, 23.868979443766342], [22.963648428302648, 25.61097010799164, 25.741427164388437, 25.749780056608973, 25.663545700693277, 25.460840731181158, 25.02845502843713, 24.53278213352048, 23.56662029547729, 22.32015684418157], [21.92539280055446, 25.360212042697718, 25.74368368755441, 25.920671737539912, 25.99183599593142, 25.973142647938182, 25.979541415857945, 26.01314350692

In [69]:
gds.beta.graphSage.write(
    G,
    modelName="RecipeModel",
    nodeLabels=["Recipe"],
    writeProperty="graphSAGE",
    relationshipTypes=["IS_SIMILAR"],
)

GraphSage:   0%|          | 0/100 [00:00<?, ?%/s]

nodeCount                                                              721
nodePropertiesWritten                                                  721
preProcessingMillis                                                      2
computeMillis                                                         1978
writeMillis                                                           1055
configuration            {'writeProperty': 'graphSAGE', 'modelName': 'R...
Name: 0, dtype: object

In [70]:
G.drop()

graphName                                                          recipes
database                                                             neo4j
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                              721
relationshipCount                                                   809024
configuration                                                           {}
density                                                           1.558453
creationTime                           2023-10-15T14:03:00.025549500+00:00
modificationTime                       2023-10-15T14:03:20.989098300+00:00
schema                   {'graphProperties': {}, 'nodes': {'Recipe': {'...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Recipe': {'...
Name: 0, dtype: object

## KNN

In [9]:
g1, res = gds.graph.project('cf-projection2', 
        {'Recipe':{'properties':['graphSAGE']}},['*'])
res

Loading:   0%|          | 0/100 [00:00<?, ?%/s]

nodeProjection            {'Recipe': {'label': 'Recipe', 'properties': {...
relationshipProjection    {'__ALL__': {'aggregation': 'DEFAULT', 'orient...
graphName                                                    cf-projection2
nodeCount                                                               721
relationshipCount                                                    292242
projectMillis                                                          1055
Name: 0, dtype: object

In [10]:
knn_stats_df = gds.knn.stream(g1, nodeProperties=['graphSAGE'],
    topK=10,
    sampleRate=1.0,
    maxIterations=1000)
knn_stats_df

Unnamed: 0,node1,node2,similarity
0,0,1,0.934446
1,0,86,0.933699
2,0,168,0.931350
3,0,356,0.930229
4,0,41,0.929180
...,...,...,...
7205,720,572,0.931083
7206,720,211,0.930957
7207,720,324,0.930563
7208,720,638,0.930088


In [11]:
knn_stats_df.describe()

Unnamed: 0,node1,node2,similarity
count,7210.0,7210.0,7210.0
mean,360.0,359.841748,0.925266
std,208.149007,205.665672,0.009627
min,0.0,0.0,0.893044
25%,180.0,184.0,0.918933
50%,360.0,357.0,0.925167
75%,540.0,534.0,0.931617
max,720.0,720.0,0.959084


In [12]:
target_node = 38 # 19 # Lasagne 
k_closest = knn_stats_df[knn_stats_df["node1"] == target_node]["node2"].values

closest = gds.run_cypher("""
    MATCH (r:Recipe) 
    WHERE id(r) = $target_id
    with r
    MATCH (r1:Recipe) 
    WHERE id(r1) in $ids
    RETURN r.title, r1.title
""", params={"target_id": target_node, "ids": k_closest})
closest

Unnamed: 0,r.title,r1.title
0,Pizza di Parma,Stroganoff med pølse
1,Pizza di Parma,Kremet fiskesuppe med blåskjell og reker
2,Pizza di Parma,Ovnsbakt middelhavskylling
3,Pizza di Parma,Tradisjonell fårikål
4,Pizza di Parma,Vegetarpølser med godt tilbehør
5,Pizza di Parma,Innbakt nøttestek
6,Pizza di Parma,Nakkekoteletter med rødkål
7,Pizza di Parma,Enchiladas med kjøttdeig
8,Pizza di Parma,Helstekt svinefilet med grønnsaksform
9,Pizza di Parma,Bacalao


r.title     10
r1.title    10
dtype: int64