# Creating the Backend Graphs


In [8]:
from neo4j import GraphDatabase, basic_auth
import nxneo4j as nx #using nxneo4j
from py2neo import Graph, Node 
import pandas

## Creating the graphs

The Graphs are created using neo4j and hosted using db aura. This allows us to access the graphs remotely. The following code connects to the db.

In [3]:
graph = GraphDatabase.driver("neo4j+s://101fd6b7.databases.neo4j.io", auth=basic_auth("neo4j","gB9F-fD2doYqInIcXR3DJZwnvvDWm-ZpgvOJ3BGCl54"))

cGraph = Graph("neo4j+s://101fd6b7.databases.neo4j.io", auth=('neo4j', "gB9F-fD2doYqInIcXR3DJZwnvvDWm-ZpgvOJ3BGCl54"))

In [4]:
G = nx.Graph(graph) #create the empty graph

In [5]:
fromFile = pandas.read_csv('../CSV/recipeList.csv')

## Preparing the data for nodes 

To start we need to get all the unique ingredients in the dataset. These will be used as ingredient nodes in the graph.

In [31]:
uniqueIng = []
for index, row in fromFile.iterrows():
    if row[1] not in uniqueIng:
        uniqueIng.append(row[1])

Next we find all the unique combinations of ingredients. These will be used for the edges later on

In [32]:
ingCombo = []
for curIndex in range(0, len(uniqueIng)):
    for tempIndex in range((curIndex + 1), len(uniqueIng)):
        tempCombo = []
        tempCombo.append(uniqueIng[curIndex])
        tempCombo.append(uniqueIng[tempIndex])
        ingCombo.append(tempCombo)

In [33]:
ingWeight = [0] * len(ingCombo)

Next we create a dictionary that contains all of the recipes as the keys, and all of the ingredients found in that recipe as the content. This will be used to connect the ingredients to there respective recipe in the graph.

In [34]:
curRecipe = ''
recIng = {}
ingList = []

listOfRecipes = []

for index, row in fromFile.iterrows():
    if curRecipe == row[0]: # the data has a lot of black space after it. This removes it
        ingList.append(row[1])
    else:
        recIng[curRecipe] = ingList
        curRecipe = row[0]
        ingList = []
        listOfRecipes.append(row[0])

recIng[curRecipe] = ingList
curRecipe = row[0]
ingList = []
del recIng['']

The weight of the connection is calculated based on the amount of recipes the pair has in common. The next snippet of code does exactly that. The weight is then normalized to be between 0 and 1.

In [35]:
for ingComboIndex in range(0, len(ingCombo)):
    firstIng = ingCombo[ingComboIndex][0]
    secondIng = ingCombo[ingComboIndex][1]
    for recipe, ings in recIng.items():
        if firstIng in ings and secondIng in ings:
            ingWeight[ingComboIndex] += 1
        else:
            continue

In [36]:
maxWeight = max(ingWeight)

In [37]:
normWeights = [i/maxWeight for i in ingWeight]

In [38]:
firstIngs = []
secondIngs = []
for fI, sI in ingCombo:
    firstIngs.append(fI)
    secondIngs.append(sI)

The Data is saved to CSV so it can be accessed easily 

In [39]:
combosDataframe = pandas.DataFrame(list(zip(firstIngs, secondIngs, normWeights)),
               columns =['FirstIng', 'SecondIng', 'ComboWeight'])

In [40]:
combosDataframe.to_csv('../CSV/sbsCompComboWeights.csv', index=False)

In [41]:
compFirstIngs = []
compSecondIngs = []
compNormWeights = []
for normWeightsIndex in range(0, len(normWeights)):
    if normWeights[normWeightsIndex] != 0.0:
        compFirstIngs.append(firstIngs[normWeightsIndex])
        compSecondIngs.append(secondIngs[normWeightsIndex])
        compNormWeights.append(normWeights[normWeightsIndex])
    else:
        continue

In [19]:
compCombosDataframe = pandas.DataFrame(list(zip(compFirstIngs, compSecondIngs, compNormWeights)),
               columns =['FirstIng', 'SecondIng', 'ComboWeight'])
compCombosDataframe.to_csv('../CSV/sbsComboWeights.csv', index=False)

In [42]:
uniqueConIng = []
for index in range(0, len(compFirstIngs)):
    if compFirstIngs[index] not in uniqueConIng:
        uniqueConIng.append(compFirstIngs[index])

    if compSecondIngs[index] not in uniqueConIng:
        uniqueConIng.append(compSecondIngs[index])

Creating all the ingredient nodes

In [114]:
for currIng in uniqueIng:
    addNode = "CREATE (n:Ingredient {name : '" + currIng + "'})"
    cGraph.query(addNode)

Creating all the recipe nodes

In [115]:
for recipe in listOfRecipes:
    recipe = recipe.replace("'", "")
    addNode = "CREATE (n:Recipe {name : '" + recipe + "'})"
    cGraph.query(addNode)

Creating all the edges between ingredients 

In [118]:
for index in range(0,len(compFirstIngs)):
    addEdge = "MATCH (a:Ingredient), (b:Ingredient) WHERE a.name = '" + compFirstIngs[index] + "' AND b.name = '" + compSecondIngs[index] + "' CREATE (a)-[r:CommonRecipes {weight:" + str(compNormWeights[index]) + " }]->(b) RETURN type(r)"
    
    cGraph.query(addEdge)


Creading all the edges between ingredients and recipes

In [44]:
for counter in range(0, len(fromFile["Recipe"])):
    fromFile["Recipe"][counter] = fromFile["Recipe"][counter].replace("'", "")
    addEdge = "MATCH (a:Recipe), (b:Ingredient) WHERE a.name = '" + fromFile["Recipe"][counter] + "' AND b.name = '" + fromFile["Ingredient"][counter] + "' CREATE (a)-[r:inRecipe]->(b) RETURN type(r)"
    
    cGraph.query(addEdge)
    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fromFile["Recipe"][counter] = fromFile["Recipe"][counter].replace("'", "")


Querying the Graph

In [50]:
query = "MATCH (i:Ingredient)-[r:inRecipe]-(c:Recipe) RETURN i.name, count(r) AS connections ORDER BY connections DESC"

result = cGraph.query(query)
print(result)

 i.name    | connections 
-----------|-------------
 olive_oil |          27 
 tomato    |          17 
 garlic    |          17 

