In [1]:
import pandas as pd
import itertools, re
from apyori import apriori

In [2]:
data = pd.read_json('recipies.json')
data.head()

Unnamed: 0,id,cuisine,ingredients
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes..."
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g..."
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,22213,indian,"[water, vegetable oil, wheat, salt]"
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe..."


In [3]:
data.shape

(39774, 3)

In [4]:
data['cuisine'].unique().size

20

In [5]:
data.cuisine.value_counts()

italian         7838
mexican         6438
southern_us     4320
indian          3003
chinese         2673
french          2646
cajun_creole    1546
thai            1539
japanese        1423
greek           1175
spanish          989
korean           830
vietnamese       825
moroccan         821
british          804
filipino         755
irish            667
jamaican         526
russian          489
brazilian        467
Name: cuisine, dtype: int64

In [6]:
pd.DataFrame({'Cuisine': data.cuisine.value_counts().index, 'Count': data.cuisine.value_counts().values})

Unnamed: 0,Cuisine,Count
0,italian,7838
1,mexican,6438
2,southern_us,4320
3,indian,3003
4,chinese,2673
5,french,2646
6,cajun_creole,1546
7,thai,1539
8,japanese,1423
9,greek,1175


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39774 entries, 0 to 39773
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           39774 non-null  int64 
 1   cuisine      39774 non-null  object
 2   ingredients  39774 non-null  object
dtypes: int64(1), object(2)
memory usage: 932.3+ KB


In [8]:
data.isna().sum()

id             0
cuisine        0
ingredients    0
dtype: int64

In [9]:
def rules(cuisine_type):
    cuisine_info = data[data['cuisine'] == cuisine_type]['ingredients']
    cuisine_num = len(cuisine_info)
    support_value = 100 / cuisine_num

    association_rules = apriori(cuisine_info, min_support=support_value, min_confidence=0.5)

    AssociationRulesDictionary = {}
    
    for item in association_rules:
        if len(item[0]) < 2:
            continue
        for k in item[2]:
            baseItemList = list(k[0])
            if not baseItemList:
                continue
            baseItemList.sort()
            baseItemList_key = tuple(baseItemList)
            if baseItemList_key not in AssociationRulesDictionary.keys():
                AssociationRulesDictionary[baseItemList_key] = []
            AssociationRulesDictionary[baseItemList_key].append((list(k[1])[0], k[3])) # all-purpose flour , 7.0692013962096185
    for ruleList in AssociationRulesDictionary:
        AssociationRulesDictionary[ruleList].sort(key=lambda x: x[1], reverse=True) # sort by lift number
   
    print(f"\n{'rule':{25}} {'items':{45}} {'lift'}")
    print("="*80)
    for item, rules in AssociationRulesDictionary.items():
        for rule in rules:
            if rule[1] > 2:
#                 item = ", ".join(item)
                item = re.sub(r"['\(\)]", "", str(item))
                item = re.sub(r",$", "", item)
                print(f"{rule[0]:{25}} {item:{45}} {rule[1]:.2f}")

In [10]:
# AssociationRulesDictionary: {('baking powder',): [('all-purpose flour', 7.0692013962096185), ('salt', 1.464034219324953)], ('all-purpose flour',): [('salt', 1.4090133254614334)], ('vanilla extract',): [('all-purpose flour', 4.600451646919549), ('sugar', 5.509769286229273)], ('arborio rice',): [('dry white wine', 7.393564615868358), 

In [12]:
while True:
    cuisine_type = input('Enter a cuisine type: ').lower()
    
    if cuisine_type == 'exit':
        break

    elif cuisine_type in data['cuisine'].unique():
        rules(cuisine_type)
        
    else:
        print("\nWe don't have recommendations for ", cuisine_type)
        print('please type again \n')

Enter a cuisine type: italian

rule                      items                                         lift
all-purpose flour         baking powder                                 7.07
sugar                     vanilla extract                               5.51
all-purpose flour         vanilla extract                               4.60
dry white wine            arborio rice                                  7.39
onions                    carrots                                       3.37
onions                    celery                                        4.12
garlic cloves             crushed red pepper                            2.52
dried oregano             dried basil                                   6.95
garlic                    red pepper flakes                             3.26
grated parmesan cheese    lasagna noodles                               2.53
salt                      butter, pepper                                2.14
onions                    carrots, olive oil 