In [2]:
%load_ext autoreload
%autoreload

from communities import *
import networkx as nx
import utils
from reading import read_pickle_graph

Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer', 'bayanpy', 'graph_tool'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'pyclustering', 'ASLPAw'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer'}


In [3]:
G = read_pickle_graph("../graph_data/out_10000.pickle") # UNDIRECTED
print("Default graph read")
utils.info(G)

Default graph read
       Graph | 'recipeGraph'
       Nodes | 12,618 (iso=2)
       Edges | 95,445 (loop=3)
      Degree | 15.13 (max=2,924)
         LCC | 100.0% (n=4)
  Clustering | 0.0032



In [4]:
# REMOVING EDGES BETWEEN RECIPE-RECIPE AND INGREDIENT-INGREDIENT
edges_to_remove = []

recipe_nodes = [node for node, data in G.nodes(data=True) if data["data"]["type"] == "recipe"]
ingredient_nodes = [node for node, data in G.nodes(data=True) if data["data"]["type"] == "ingredients"]

for node in recipe_nodes:
    for neighbor in G.neighbors(node):
        if neighbor in recipe_nodes:
            edges_to_remove.append((node, neighbor))
print(f'removed {len(edges_to_remove)} edges between recipe nodes')
G.remove_edges_from(edges_to_remove)
edges_to_remove = []
for node in ingredient_nodes:
    for neighbor in G.neighbors(node):
        if neighbor in ingredient_nodes:
            edges_to_remove.append((node, neighbor))
print(f'removed {len(edges_to_remove)} edges between ingredient nodes')
G.remove_edges_from(edges_to_remove)

print(f'Post prunning info')
# utils.info(G)

print(f'Number of recipe nodes: {len(recipe_nodes)}')
print(f'Number of ingredient nodes: {len(ingredient_nodes)}')

print(f'Reducing to LCC')
G = G.subgraph(sorted(nx.connected_components(G), key=len, reverse=True)[0])
# utils.info(G)

recipe_nodes = [node for node, data in G.nodes(data=True) if data["data"]["type"] == "recipe"]
ingredient_nodes = [node for node, data in G.nodes(data=True) if data["data"]["type"] == "ingredients"]

removed 100 edges between recipe nodes
removed 851 edges between ingredient nodes
Post prunning info
       Graph | 'recipeGraph'
       Nodes | 12,618 (iso=5)
       Edges | 94,968 (loop=0)
      Degree | 15.05 (max=2,914)
         LCC | 99.9% (n=7)
  Clustering | 0.0000

Number of recipe nodes: 9839
Number of ingredient nodes: 2779
Reducing to LCC
       Graph | 'recipeGraph'
       Nodes | 12,611 (iso=0)
       Edges | 94,967 (loop=0)
      Degree | 15.06 (max=2,914)
         LCC | 100.0% (n=1)
  Clustering | 0.0000



In [54]:
for recipe_node in recipe_nodes:
    print(recipe_node, G.nodes()[recipe_node]["data"]["slug"])

0 rhubarb-crumble-cake
9 chicken-cider-fricassee-parsley-croutes
25 baked-conchiglioni-sausage-sage-butternut-squash
35 special-seafood-saffron-pasta
48 individual-rhubarb-ripple-pavlovas
53 pink-jam-slice
58 venetian-duck-ragu
66 walnut-red-pepper-pesto-pasta
73 orecchiette-anchovies-purple-sprouting-broccoli
79 coconut-mango-sponge
84 south-indian-egg-curry-rice-lentil-pilau
98 spiced-chicken-pineapple-salad
105 miso-brown-rice-broccoli-salad-fiery-prawns
114 roasted-squash-shallot-spinach-ricotta-pasta
118 chicken-chorizo-pie
123 squid-pinto-bean-stew-garlic-toasts
132 lighter-chicken-katsu
139 chocolate-sponge-hot-chocolate-custard
147 ginger-noodles-chicken-hoisin-aubergines
155 pistachio-orange-madeleines
159 eccles-cakes
163 falafel-scotch-eggs
168 cheesy-hot-cross-buns
173 spring-chicken-paella
178 all-one-posh-lamb-balti
185 carrot-tomato-soup
189 mr-mcgregors-rabbit-pie
199 boozy-self-saucing-chocolate-pud
202 bunny-carrot-cake
207 easter-nest-coconut-white-chocolate-cupcakes

In [5]:
# recipe recommendation approaches:
# INPUTS:
# A) given recipe
# B) given multiple recipes 
# C) given multiple ingredients
# D) given mix of both 

In [79]:
import random
import numpy as np


def return_coms(com_tree, node, depth=1):
    # utils.info(com_tree.G)
    for communnity_idx in com_tree.communities:
        if node in com_tree.communities[communnity_idx]:
            if depth == 1:
                # print(com_tree)
                # print(com_tree.get_communities())
                return [com_tree.communitySubtrees[communnity_idx].G]
            else:
                return [com_tree.communitySubtrees[communnity_idx].G] + return_coms(com_tree.communitySubtrees[communnity_idx], node, depth-1)

def calc_ingredient_score(G, ingredient_nodes):
    freqs = {}
    for node in ingredient_nodes:
        freqs[node] = G.degree(node)

    max_freq = max(freqs.values())
    for node in freqs:
        freqs[node] = 1 -  freqs[node] / max_freq
    return freqs


def random_walk(G, start_node, ingredient_scores, steps=500_000, p=0.85):

    scores = {}
    node = start_node
    for _ in range(steps):
        # JUMP FROM RECIPE TO INGREDIENT TO RECIPE
        if random.random() < p:
            node = random.choice(list(G.neighbors(node)))
        else:
            node = start_node
            continue

        score = ingredient_scores[node]
        node = random.choice(list(G.neighbors(node)))

        if G.nodes()[node]["data"]["type"] == "recipe":
            if node in scores:
                scores[node] += score / G.degree(node)
            else:
                scores[node] = score / G.degree(node)
    return scores

            
# example_recipe_node = random.choice(recipe_nodes)
# 4343 good
# 0 good

def rw_scores(G_full, recipe_node, ingredient_scores, max_depth=4, com_depth=0):
    com_tree = Community_tree(G_full, max_depth=max_depth)
    print(recipe_node, G_full.nodes()[recipe_node]["data"]["slug"])

    Gs = return_coms(com_tree, recipe_node, max_depth - 1)
    G_ = Gs[com_depth]
    utils.info(G_, clustering_sample=10)
    scores = random_walk(G_, recipe_node, ingredient_scores)
    return scores


ingredient_scores = calc_ingredient_score(G, ingredient_nodes)
scores = rw_scores(G_full=G, recipe_node=0, ingredient_scores = ingredient_scores, max_depth=4, com_depth=0)
for node, score in sorted(scores.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f'{G.nodes()[node]["data"]["slug"]}: {score}')

# orange-berry-wedding-cake: 431.90905971173515
# gingerbread-cake-caramel-biscuit-icing: 394.68737131091365
# rhubarb-marzipan-citrus-cake: 389.8476321207961
# rhubarb-crumble-cake: 386.34797529169595
# rhubarb-elderflower-cake: 380.85209334248447
# sticky-ginger-lemon-drizzle-cake: 376.4794097460535

# ingredient_scores = calc_ingredient_score(G, ingredient_nodes)
# def return_scores(G, node, ingredient_scores):
#     com_tree = Community_tree(G, max_depth=4)
#     Gs_recipe = return_coms(com_tree, node, 3)
#     for G_ in Gs_recipe:
#         freqs = random_walk(G_, node, ingredient_scores)
#         for node, freq in sorted(freqs.items(), key=lambda x: x[1], reverse=True)[:5]:
#             print(f'{G.nodes()[node]["data"]["slug"]}: {freq}')

# return_scores(G, 0, ingredient_scores)


0 rhubarb-crumble-cake
       Graph | 'recipeGraph'
       Nodes | 3,546 (iso=0)
       Edges | 20,475 (loop=0)
      Degree | 11.55 (max=1,381)
         LCC | 100.0% (n=1)
  Clustering | 0.0000

roasted-rhubarb: 110.62560054907289
rhubarb-gin: 109.20693205216136
rhubarb-ginger-jam: 81.75943719972523
rhubarb-cordial: 73.51784488675368
stewed-rhubarb: 70.298901853123


In [88]:
recipes = [3394, 3449]

scoress = {}
for recipe in recipes:
    scores = rw_scores(G_full=G, recipe_node=recipe, ingredient_scores = ingredient_scores, max_depth=4, com_depth=0)
    # get max score
    max_score = max(scores.values())
    # normalize scores
    for node in scores:
        scores[node] = scores[node] / max_score
    for node, score in scores.items():
        if node in scoress:
            scoress[node] += score
        else:
            scoress[node] = score

# print top 20
for node, score in sorted(scoress.items(), key=lambda x: x[1], reverse=True)[:20]:
    print(f'{G.nodes()[node]["data"]["slug"]}: {score}')

3394 beef-stir-fry
       Graph | 'recipeGraph'
       Nodes | 2,304 (iso=0)
       Edges | 11,636 (loop=0)
      Degree | 10.10 (max=825)
         LCC | 100.0% (n=1)
  Clustering | 0.0000

3449 thai-satay-stir-fry
       Graph | 'recipeGraph'
       Nodes | 3,220 (iso=0)
       Edges | 17,947 (loop=0)
      Degree | 11.15 (max=1,038)
         LCC | 100.0% (n=1)
  Clustering | 0.0000

thai-satay-stir-fry: 1.6366985420393256
10-minute-pad-thai: 1.449906922054728
egg-drop-chicken-noodle-soup: 1.2946328205156201
beef-stir-fry: 1.2902251629961121
grilled-chicken-noodles-bun-ga-nuong: 1.2794163457112977
oriental-steak-salad: 1.2497790520848577
gunpowder-chicken-dried-red-chillies-peanuts: 1.1930967939225932
peanut-butter-chicken: 1.1892619781493428
congee-soy-eggs: 1.1082549000985848
spicy-thai-prawn-noodles: 1.1051477556771188
kung-po-prawns: 1.0698517866786887
barbecued-sticky-chinese-pork-chops: 1.0580905559955693
veggie-noodle-pot: 1.0138778366167143
apple-crunch: 0.9848277476519156
10-