In [38]:
import networkx as nx
import process
import matplotlib.pyplot as plt

recipes = process.process_recipes('recipes.jl')
names = []
for rec in recipes:
    names.append(rec['name'])

ing_words = set()
edges = []
for rec in recipes:
    ings = rec['ingredients']
    for ing in ings:
        words = ing.split()
        for word in words:
            ing_words.add(word)
            edges.append((rec['name'],word))
ing_words = list(ing_words)

In [39]:
# Create bipartite graph
B = nx.Graph()
B.add_nodes_from(names, bipartite=0)
B.add_nodes_from(ing_words, bipartite=1)
B.add_edges_from(edges)

In [40]:
# draw bipartite graph
print('Recipes:', len(names))
print('Ingredients:', len(ing_words))
print('Edges:', len(edges))

if nx.is_connected(B):
    l, r = nx.bipartite.sets(B)
else:
    l = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
    r = set(B) - l
pos = {}

pos.update((node, (1, index)) for index, node in enumerate(l))
pos.update((node, (2, index)) for index, node in enumerate(r))

#nx.draw(B, pos=pos)
#plt.show()

Recipes: 1001
Ingredients: 1993
Edges: 24011


In [41]:
def collapse_bipartite_graph(B, recipes):
    if nx.is_connected(B):
        left, right = nx.bipartite.sets(B)
    else:
        left = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
        right = set(B) - left
    if recipes:
        return nx.bipartite.projected_graph(B, left)
    else:
        return nx.bipartite.projected_graph(B, right)

G_rec = collapse_bipartite_graph(B, True)

G_ing = collapse_bipartite_graph(B, False)
#print(G.nodes())
#pos = nx.spring_layout(G)
#nx.draw(G, pos=pos)

# STRENGTH
I will first try to use strength measures to find out some recipes (ingredients) with strong ties, overlap and small span.

In [None]:
def tie_strength(G, v, w):
    # return the numebr of common friends +1
    v_neighbours = set(G.neighbors(v))
    w_neighbours = set(G.neighbors(w))
    
    return len(v_neighbours & w_neighbours) + 1

def overlap(G,u,v):
    # return overlap(u,v)
    u_neighbours = set(G.neighbors(u))
    v_neighbours = set(G.neighbors(v))

    return len(u_neighbours & v_neighbours) / len(u_neighbours | v_neighbours)

def span(G,u,v):
    # return the distance between nodes u and v of the network G 
    # when the edge {u,v} is removed from the network
    G.remove_edge(u,v)
    try:
      res = nx.shortest_path_length(G,u,v)
    except nx.NetworkXNoPath:
      res = 0
    G.add_edge(u,v)
    return res

In [43]:
# Recipes graph strenghts

rec_tie_str = dict(
    ((v,w), tie_strength(G_rec, v, w)) for v, w in G_rec.edges()
)
sort_rec_tie_str = sorted(rec_tie_str.items(), key=lambda x:x[1], reverse=True)

rec_overlap_str = dict(
    ((v,w), overlap(G_rec, v, w)) for v, w in G_rec.edges()
)
sort_rec_overlap_str = sorted(rec_overlap_str.items(), key=lambda x:x[1], reverse=True)

rec_span_str  = dict(
    ((v,w), span(G_rec, v, w)) for v, w in G_rec.edges()
)
sort_rec_span_str = sorted(rec_span_str.items(), key=lambda x:x[1], reverse=False)

### RESULTS
We can see that there are some recipes which names sound like some meat pies which are connected together and also with 'Beef Crostini with Horseradish Spread' which looks like Czech 'Chlebíčky' which is similar to small meat pie.

The recipes with strongest overlap are strangely connected. There is connection of recipes with chicken and sweet recipes like chocolate cheesecake, Crispy Creamy Potato Pancakes.
It seems like there are some similar ingrediences between these sweet and meaty foods but in next part of this notebook, I will try to find communities by using some networkX community library algorithms.

In [44]:
print(sort_rec_tie_str[:10])
print()
print(sort_rec_overlap_str[:10])
print()
print(sort_rec_span_str[:10])

[(('Jack’s Carrot Cake', 'Beef Crostini with Horseradish Spread'), 971), (('Glenn’s Sweet & Spicy Slow Cooker Chili', 'Beef Crostini with Horseradish Spread'), 970), (('Jack’s Carrot Cake', 'Glenn’s Sweet & Spicy Slow Cooker Chili'), 968), (('Glenn’s Sweet & Spicy Slow Cooker Chili', 'Spaghetti Pie'), 968), (('Jack’s Carrot Cake', 'Cara Cara Beet Salad'), 967), (('Jack’s Carrot Cake', 'Pumpkin Cider Poke Cake'), 967), (('Beef Crostini with Horseradish Spread', 'Spaghetti Pie'), 967), (('Chicken Carbonara', 'Creamy Summer Squash Casserole'), 966), (('Chicken Carbonara', 'Jack’s Carrot Cake'), 966), (('Mostarda', 'Sweet and Spicy Smoked Pork Ribs'), 966)]

[(('Four Cheese Baked Manicotti', 'Spinach Lasagna Rollups'), 0.9978768577494692), (('White Barbecue Chicken', 'How to Grill Chicken Drumsticks'), 0.9977142857142857), (('Chocolate Peanut Butter Pie', 'Chocolate Peanut Butter Cheesecake Bites'), 0.9975460122699387), (('Chicken Lasagna with Pesto Cream Sauce', 'Keto ‘Mac’ and Cheese wit

In [45]:
# Ingredients graph strenghts

ing_tie_str = dict(
    ((v,w), tie_strength(G_ing, v, w)) for v, w in G_ing.edges()
)
sort_ing_tie_str = sorted(ing_tie_str.items(), key=lambda x:x[1], reverse=True)

ing_overlap_str = dict(
    ((v,w), overlap(G_ing, v, w)) for v, w in G_ing.edges()
)
sort_ing_overlap_str = sorted(ing_overlap_str.items(), key=lambda x:x[1], reverse=True)

ing_span_str  = dict(
    ((v,w), span(G_ing, v, w)) for v, w in G_ing.edges()
)
sort_ing_span_str = sorted(ing_span_str.items(), key=lambda x:x[1], reverse=False)

### Ingredients Strength results

We can see that there is strong tie between salt, pepper, oil, olive (oil), butter, onion, garlic, flour, and egg. These are main ingredients of the European quisine.

In overlap, we can see words that are not ingredients and are probably only in few recipes. These words surely overlap because they are only together in the same recipes.

It seems like tarragon spice is strongly connected through many recipes to other spices and basic ingredients like potato and butter.



In [46]:
print(sort_ing_tie_str[:20])
print()
print(sort_ing_overlap_str[:20])
print()
print(sort_ing_span_str[:20])

[(('salt', 'pepper'), 1436), (('salt', 'oil'), 1404), (('pepper', 'oil'), 1372), (('oil', 'olive'), 1309), (('salt', 'butter'), 1289), (('pepper', 'onion'), 1287), (('salt', 'onion'), 1267), (('pepper', 'garlic'), 1261), (('pepper', 'olive'), 1251), (('salt', 'olive'), 1247), (('salt', 'garlic'), 1245), (('garlic', 'oil'), 1210), (('oil', 'onion'), 1188), (('garlic', 'onion'), 1156), (('butter', 'pepper'), 1130), (('salt', 'egg'), 1114), (('garlic', 'olive'), 1106), (('olive', 'onion'), 1100), (('butter', 'oil'), 1098), (('salt', 'flour'), 1094)]

[(('smith', 'granny'), 0.9913419913419913), (('marzano', 'san'), 0.9862068965517241), (('choy', 'bok'), 0.9801980198019802), (('masala', 'garam'), 0.9797979797979798), (('triple', 'sec'), 0.978494623655914), (('further', 'five'), 0.978494623655914), (('further', 'butterfly'), 0.978494623655914), (('further', 'evaporate'), 0.978494623655914), (('further', 'platter'), 0.978494623655914), (('further', 'saute'), 0.978494623655914), (('further', '

# COMMUNITIES

In [47]:
# Communities

from networkx.algorithms import community

def print_quality_of_Communities_partition(G, part):
    print('     coverage: ' + str(community.coverage(G,part)))
    print('     modularity: ' + str(community.modularity(G,part)))
    print('     performance: ' + str(community.performance(G,part)))


In [48]:
# Modularity
c_rec = list(community.greedy_modularity_communities(G_rec))
print('Number of recipes communities:' + str(len(c_rec)))
print('  communities size:')
for com in c_rec:
    print('    ' + str(len(com)))
print('  Recipes communities partition quality:')
print_quality_of_Communities_partition(G_rec, c_rec)

c_ing = list(community.greedy_modularity_communities(G_ing))
print()
print('Number of ingredients communities:' + str(len(c_ing)))
print('  communities size:')
for com in c_ing:
    print('    ' + str(len(com)))
print('  Ingredients communities partition quality:')
print_quality_of_Communities_partition(G_ing, c_ing)

#for com in c:
#    print(len(com))

Number of recipes communities:5
  communities size:
    579
    419
    1
    1
    1
  Recipes communities partition quality:
     coverage: 0.5485450397818951
     modularity: 0.031105087697770872
     performance: 0.5743816183816184

Number of ingredients communities:6
  communities size:
    800
    700
    429
    56
    5
    3
  Ingredients communities partition quality:
     coverage: 0.5418775047565073
     modularity: 0.16743515722521723
     performance: 0.6728847149763126


### Recipes community
We can see, that first group has common ingredients of European (Italian) quisine like salt, pepper and so on.

The secon community of recipes has common ingredients like sugar, salt, flour, egg, vanilla and so on. These ingredients are common for the sweet recipes and pastry. There are also recipes with meat and sweet BBQ sauce.

In [49]:
def find_common_neighbors_in_community(community, bip_G):
    #common_n = list(bip_G.neighbors(next(iter(community))))
    neighbors_count = dict()
    for ind in community:
        #print(common_n)
        #new_common_n = []
        neighbors = bip_G.neighbors(ind)
        for neighbor in neighbors:
            if neighbor in neighbors_count:
                neighbors_count[neighbor] += 1
            else:
                neighbors_count[neighbor] = 1
        #    #print(ing)
        #    if neighbor in common_n:
        #        new_common_n.append(neighbor)
        #common_n = new_common_n.copy()
    return sorted(neighbors_count.items(), key = lambda item: item[1], reverse=True)

# left = recipes
# right = ingredients
for com in c_rec:
    print('Community size:' + str(len(com)) + '\n  Most common ingredients:' + str(find_common_neighbors_in_community(com, B)[:10]))
    print('   Examples of recipes names:')
    it = iter(com)
    cl = len(com)
    ex_rec = 10
    if cl > ex_rec:
        for i in range(ex_rec):
            print('       ' + str(next(it)))
    else:
        for i in range(cl):
            print('       ' + str(next(it)))
    print()
    


Community size:579
  Most common ingredients:[('pepper', 467), ('salt', 449), ('oil', 409), ('garlic', 351), ('olive', 349), ('onion', 325), ('cheese', 244), ('butter', 202), ('tomato', 174), ('sauce', 144)]
   Examples of recipes names:
       Hummus with Caramelized Cauliflower and Onions
       Carne Asada
       Shakshuka
       Roasted Asparagus
       Italian Beef and Sausage Pie
       Grilled Spaghetti Squash Steaks
       Cuban Panini Burger
       Focaccia
       Vegetable Chili
       Perfect Pork Tenderloin

Community size:419
  Most common ingredients:[('sugar', 327), ('salt', 275), ('butter', 260), ('egg', 214), ('flour', 208), ('vanilla', 153), ('powder', 142), ('baking', 138), ('milk', 137), ('extract', 137)]
   Examples of recipes names:
       Chinese BBQ Pork
       Puff Pastry Bacon Twists
       Baked Brie with Mostarda
       Apple-Stuffed Acorn Squash
       Grilled Stone Fruit with Mascarpone and Cherry Granola
       Cinnamon Raisin Swirl Babka
       Kielbasa 

### Ingredients community
The examples of ingredients in this case are mixed with many non-ingredients words, because these are only examples from the given community.

We can see in the first group of ingredients has the recipe with 47 ingredients from this community. It seems like many of the recipes are from British quisine with a lot of meat and names like Beef Wellington, New England Boiled Dinner and Beef Bourguignon (taken from French people maybe?). Only exception is the Grilled Vegetable Salad.

The second group looks like sweet Babkas, cookies and breaded foods like onion rings.

The third group looks like Medditerean quisine with many Italian and Greek foods.

The fourth community has a lot of chicken in it.





In [50]:
for com in c_ing:
    print('Community size:' + str(len(com)) + '\n  Most common recipes:' + str(find_common_neighbors_in_community(com, B)[:10]))
    print('   Examples of ingredients names:')
    it = iter(com)
    cl = len(com)
    ex_ing = 20
    if cl > ex_ing:
        for i in range(ex_ing):
            print('       ' + str(next(it)))
    else:
        for i in range(cl):
            print('       ' + str(next(it)))
    print()

Community size:800
  Most common recipes:[('Holiday Chicken Soup', 47), ('Beef Wellington', 45), ('Grilled Vegetable Salad with Basil Dressing', 44), ('Instant Pot Beef and Noodles', 39), ('Chicken a la King', 39), ('Pork Lomitos Tacos', 38), ('Egg Roll in a Bowl', 36), ('Crunchy Vegetable Rice Bowl with Warm Peanut Sauce', 36), ('New England Boiled Dinner (Corned Beef and Cabbage)', 35), ('Beef Bourguignon', 35)]
   Examples of ingredients names:
       link
       tbsp
       beech
       grapeseed
       potato
       tarragon
       virginia
       mild
       uncured
       aside
       slicing
       square
       harissa
       served
       spicy
       mashed
       lard
       gruyre
       julienne
       topped

Community size:700
  Most common recipes:[('Red Velvet PEEPS® Holiday Wreath Cake', 37), ('Onion Rings', 34), ('Crunchie Munchie Cookies', 32), ('Cinnamon Raisin Swirl Babka', 31), ('Malted Mocha Swiss Roll', 30), ('No-Bake Amaretto Truffles', 29), ('Polish Babka', 

# Other community algorithms

As can be seen the Lable propagation method has wore coverage, modularity and performance than the Greedy modularity method used above. Other methods were unable to return result either because they are computationaly hard (NP complexity) or because they cannot work with graph that has more than 1 component.

In [51]:
#Label propagation, Worse community division than in case of greedy modularity 
c_lab_rec = list(community.label_propagation_communities(G_rec))
print(len(c_lab_rec))
c_lab_ing = list(community.label_propagation_communities(G_ing))
print(len(c_lab_ing))

print('Recipes communities partition quality:')
print_quality_of_Communities_partition(G_rec, c_lab_rec)
print('Ingredients communities partition quality:')
print_quality_of_Communities_partition(G_ing, c_lab_ing)

4
1
Recipes communities partition quality:
     coverage: 1.0
     modularity: 1.1102230246251565e-16
     performance: 0.8678301698301698
Ingredients communities partition quality:
     coverage: 1.0
     modularity: 0.0
     performance: 0.049778642920905905
