In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("prior_train_orders.csv")
df = df[df['eval_set'] == 'train']
df

Unnamed: 0,user_id,order_id,product_id,aisle_id,department_id,add_to_cart_order,reordered,product_name,aisle,department,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,112108,1,49302,120,16,1,1,Bulgarian Yogurt,yogurt,dairy eggs,train,4,4,10,9.0
1,112108,1,49683,83,4,4,0,Cucumber Kirby,fresh vegetables,produce,train,4,4,10,9.0
2,112108,1,13176,24,4,6,0,Bag of Organic Bananas,fresh fruits,produce,train,4,4,10,9.0
3,112108,1,43633,95,15,5,1,Lightly Smoked Sardines in Olive Oil,canned meat seafood,canned goods,train,4,4,10,9.0
4,112108,1,10246,83,4,3,0,Organic Celery Hearts,fresh vegetables,produce,train,4,4,10,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33818974,169679,3421063,49235,53,16,1,1,Organic Half & Half,cream,dairy eggs,train,30,0,10,4.0
33818975,169679,3421063,14233,115,7,3,1,Natural Artesian Water,water seltzer sparkling water,beverages,train,30,0,10,4.0
33819017,139822,3421070,16953,88,13,2,1,Creamy Peanut Butter,spreads,pantry,train,15,6,10,8.0
33819018,139822,3421070,35951,91,16,1,1,Organic Unsweetened Almond Milk,soy lactosefree,dairy eggs,train,15,6,10,8.0


### Create a basket
As the dataset contains huge amout of data, let us take a subset of the data to extract the association rules from it.

**Assumptions**: Segment the data by considering the 100 most frequent ordered items. Please note it is just an assumption. You can consider 'n frequent order items as per your choice.

In [3]:
product_counts = df.groupby('product_id')['order_id'].count().reset_index().rename(columns = {'order_id':'frequency'})
product_counts = product_counts.sort_values('frequency', ascending=False)[0:100].reset_index(drop=True)
product_counts


Unnamed: 0,product_id,frequency
0,24852,18726
1,13176,15480
2,21137,10894
3,21903,9784
4,47626,8135
...,...,...
95,8193,1418
96,9387,1379
97,37687,1362
98,20995,1361


In [4]:
freq_products = list(product_counts.product_id)
del product_counts 
freq_products[1:10]


[13176, 21137, 21903, 47626, 47766, 47209, 16797, 26209, 27966]

In [5]:
order_products = df[df.product_id.isin(freq_products)]
del df
order_products.shape


(314227, 15)

In [6]:
df=order_products[['order_id','product_name','reordered']].set_index('order_id')
df

Unnamed: 0_level_0,product_name,reordered
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Cucumber Kirby,0
1,Bag of Organic Bananas,0
1,Organic Hass Avocado,0
1,Organic Whole String Cheese,1
36,Organic Garnet Sweet Potato (Yam),1
...,...,...
3420998,Organic Cilantro,1
3421049,Organic Baby Broccoli,0
3421056,Sparkling Lemon Water,1
3421063,Organic Half & Half,1


In [7]:
basket = df.pivot_table(columns='product_name', values='reordered', index='order_id').reset_index().fillna(0).set_index('order_id')
basket

product_name,100% Whole Wheat Bread,2% Reduced Fat Milk,Apple Honeycrisp Organic,Asparagus,Bag of Organic Bananas,Banana,Blueberries,Boneless Skinless Chicken Breasts,Broccoli Crown,Bunched Cilantro,...,Sparkling Lemon Water,Sparkling Natural Mineral Water,Sparkling Water Grapefruit,Spring Water,Strawberries,Uncured Genoa Salami,Unsalted Butter,Unsweetened Almondmilk,Unsweetened Original Almond Breeze Almond Milk,Yellow Onions
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
98,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3420998,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3421049,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3421056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3421063,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
basket = basket.applymap(encode_units)
basket.head()




product_name,100% Whole Wheat Bread,2% Reduced Fat Milk,Apple Honeycrisp Organic,Asparagus,Bag of Organic Bananas,Banana,Blueberries,Boneless Skinless Chicken Breasts,Broccoli Crown,Bunched Cilantro,...,Sparkling Lemon Water,Sparkling Natural Mineral Water,Sparkling Water Grapefruit,Spring Water,Strawberries,Uncured Genoa Salami,Unsalted Butter,Unsweetened Almondmilk,Unsweetened Original Almond Breeze Almond Milk,Yellow Onions
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36,0,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
38,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


### Apply Apriori algorithm
As the dataset contains huge amount of data, let us take a subset of the data to extract the association rules from it.

**Assumptions**: Segment the basket by considering 100000 record. Please note its just an assumption, you can consider 'n' records as per choice.

In [9]:
shortbasket = basket[:100000]
shortbasket

product_name,100% Whole Wheat Bread,2% Reduced Fat Milk,Apple Honeycrisp Organic,Asparagus,Bag of Organic Bananas,Banana,Blueberries,Boneless Skinless Chicken Breasts,Broccoli Crown,Bunched Cilantro,...,Sparkling Lemon Water,Sparkling Natural Mineral Water,Sparkling Water Grapefruit,Spring Water,Strawberries,Uncured Genoa Salami,Unsalted Butter,Unsweetened Almondmilk,Unsweetened Original Almond Breeze Almond Milk,Yellow Onions
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36,0,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
38,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3420998,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3421049,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3421056,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3421063,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


####  Apriori is a popular algorithm  for extracting frequent itemsets with applications in association rule learning. The apriori algorithm has been designed to operate on databases containing transactions, such as purchases by customers of a store. An itemset is considered as "frequent" if it meets a user-specified support threshold.

For instance, if the support threshold is set to 0.5 (50%), a frequent itemset is defined as a set of items that occur together in at least 50% of all transactions in the database.

In [10]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

frequent_items = apriori(shortbasket, min_support=0.001, use_colnames=True , verbose =1 , low_memory=True)

# The length column has been added to increase ease of filtering.
frequent_items['length'] = frequent_items['itemsets'].apply(lambda x: len(x))
frequent_items

# min_support=0.01--> 108 item set a frequent itemset is defined as a set of items that occur together in at least 1% of all transactions 
# min_support=0.02--> 37 item set  a frequent itemset is defined as a set of items that occur together in at least 2% of all transactions
# min_support=0.001--> 1575  item set  a frequent itemset is defined as a set of items that occur together in at least .1% of all transactions 



Processing 15 combinations | Sampling itemset size 4 3


Unnamed: 0,support,itemsets,length
0,0.018668,(100% Whole Wheat Bread),1
1,0.013298,(2% Reduced Fat Milk),1
2,0.017304,(Apple Honeycrisp Organic),1
3,0.026180,(Asparagus),1
4,0.142376,(Bag of Organic Bananas),1
...,...,...,...
1570,0.001289,"(Organic Whole Milk, Organic Raspberries, Orga...",3
1571,0.001002,"(Organic Yellow Onion, Organic Raspberries, Or...",3
1572,0.001385,"(Bag of Organic Bananas, Organic Hass Avocado,...",4
1573,0.001087,"(Bag of Organic Bananas, Organic Cucumber, Org...",4


In [11]:
frequent_items[frequent_items['support'] == frequent_items.support.max()]

  and should_run_async(code)


Unnamed: 0,support,itemsets,length
5,0.17642,(Banana),1


In [12]:
# the most 10 frequently occurring item in our dataset with item set of given length  
def most_10_frequent_items(length):
    return frequent_items.sort_values('support', ascending=False)[frequent_items['length'] == length].head(10)

    

  and should_run_async(code)


In [13]:
# the most 10 frequently occurring item in our dataset with item set of length 1 

most_10_frequent_items(1)
# The output shows the Banana is the most frequently occurring item in our dataset

  and should_run_async(code)
  return  frequent_items.sort_values('support', ascending=False)[frequent_items['length'] == length].head(10)


Unnamed: 0,support,itemsets,length
5,0.17642,(Banana),1
4,0.142376,(Bag of Organic Bananas),1
71,0.091668,(Organic Strawberries),1
37,0.085828,(Organic Baby Spinach),1
33,0.06634,(Organic Avocado),1
57,0.064379,(Organic Hass Avocado),1
28,0.063111,(Large Lemon),1
94,0.050996,(Strawberries),1
65,0.045594,(Organic Raspberries),1
30,0.045115,(Limes),1


In [14]:
# the most 10 frequently occurring item in our dataset with item set of length 2

most_10_frequent_items(2)
# The output shows that the Organic Strawberries & Bag of Organic Bananas combination are the most frequently occurring items when the length of the itemset is two.

  and should_run_async(code)
  return  frequent_items.sort_values('support', ascending=False)[frequent_items['length'] == length].head(10)


Unnamed: 0,support,itemsets,length
225,0.025456,"(Bag of Organic Bananas, Organic Strawberries)",2
211,0.021449,"(Bag of Organic Bananas, Organic Hass Avocado)",2
277,0.019318,"(Banana, Organic Avocado)",2
192,0.018498,"(Bag of Organic Bananas, Organic Baby Spinach)",2
314,0.017315,"(Banana, Organic Strawberries)",2
281,0.016665,"(Banana, Organic Baby Spinach)",2
272,0.016537,"(Large Lemon, Banana)",2
336,0.015248,"(Banana, Strawberries)",2
219,0.014811,"(Bag of Organic Bananas, Organic Raspberries)",2
1171,0.013074,"(Organic Raspberries, Organic Strawberries)",2


In [15]:
# the most 10 frequently occurring item in our dataset with item set of length 3 

most_10_frequent_items(3)

# The output shows the (Organic Hass Avocado, Organic Strawberries, Bag of Organic Bananas combination are the most frequently occurring items when the length of the itemset is three.

  and should_run_async(code)
  return  frequent_items.sort_values('support', ascending=False)[frequent_items['length'] == length].head(10)


Unnamed: 0,support,itemsets,length
1381,0.006063,"(Bag of Organic Bananas, Organic Hass Avocado,...",3
1400,0.005306,"(Bag of Organic Bananas, Organic Raspberries, ...",3
1332,0.004752,"(Bag of Organic Bananas, Organic Strawberries,...",3
1377,0.004454,"(Bag of Organic Bananas, Organic Hass Avocado,...",3
1326,0.004188,"(Bag of Organic Bananas, Organic Hass Avocado,...",3
1448,0.003964,"(Banana, Organic Avocado, Organic Baby Spinach)",3
1428,0.00358,"(Large Lemon, Organic Avocado, Banana)",3
1352,0.003101,"(Bag of Organic Bananas, Organic Cucumber, Org...",3
1457,0.002952,"(Banana, Organic Avocado, Organic Strawberries)",3
1426,0.002941,"(Large Lemon, Limes, Banana)",3


In [16]:
# the most 10 frequently occurring item in our dataset with item set of length 4 

most_10_frequent_items(4)

# The output shows the 'Organic Hass Avocado', 'Organic Raspberries', 'Organic Strawberries', 'Bag of Organic Bananas' combination are the most frequently occurring items when the length of the itemset is four.

  and should_run_async(code)
  return  frequent_items.sort_values('support', ascending=False)[frequent_items['length'] == length].head(10)


Unnamed: 0,support,itemsets,length
1574,0.001811,"(Bag of Organic Bananas, Organic Hass Avocado,...",4
1572,0.001385,"(Bag of Organic Bananas, Organic Hass Avocado,...",4
1573,0.001087,"(Bag of Organic Bananas, Organic Cucumber, Org...",4


In [17]:
#all items with a given length and at least support
def all_items_with_at_least_support_and_len(support , len):
    return frequent_items[ (frequent_items['length'] == len) & (frequent_items['support'] >= support) ]
    

  and should_run_async(code)


In [18]:
#all items with a length of 2, and the minimum support is more than 0.02 (occur together in at least 2% of all transactions)

all_items_with_at_least_support_and_len(0.02 , 2)

  and should_run_async(code)


Unnamed: 0,support,itemsets,length
211,0.021449,"(Bag of Organic Bananas, Organic Hass Avocado)",2
225,0.025456,"(Bag of Organic Bananas, Organic Strawberries)",2


In [19]:
#all items with a length of 1, and the minimum support is more than 0.1 (occur in at least 10% of all transactions)

all_items_with_at_least_support_and_len(0.1 , 1)

  and should_run_async(code)


Unnamed: 0,support,itemsets,length
4,0.142376,(Bag of Organic Bananas),1
5,0.17642,(Banana),1


In [20]:
# showing support of itemset of product
def showing_support_of_itemset(frozenset):
    return frequent_items[ frequent_items['itemsets'] == frozenset ]
    
    

  and should_run_async(code)


In [21]:

# showing_support_of_itemset({'Organic Strawberries', 'Organic Baby Spinach'})
# showing_support_of_itemset({'Organic Hass Avocado','Bag of Organic Bananas'})
# showing_support_of_itemset({'Organic Hass Avocado','Organic Strawberries','Bag of Organic Bananas'})
showing_support_of_itemset({'Organic Hass Avocado','Organic Raspberries','Organic Strawberries','Bag of Organic Bananas'})


  and should_run_async(code)


Unnamed: 0,support,itemsets,length
1574,0.001811,"(Bag of Organic Bananas, Organic Hass Avocado,...",4


### The association rules :
are simply the if-else statements. The IF component of an association rule is known as the antecedent. The THEN component is known as the consequent. The antecedent and the consequent are disjoint; they have no items in common.

> 1) **Support**: This measure gives an idea of how frequent an itemset is in all the transactions,  the fraction of the total number of transactions in which the itemset occurs.<br>
2) **Confidence**: This says how likely item Y is purchased when item X is purchased, expressed as {X -> Y} , is the conditional probability of occurrence of consequent given the antecedent.<br>
3) **Lift**: This says how likely item Y is purchased when item X is purchased, while controlling for how popular item Y is.<br>
4) **Leverage** : how different is the co-occurrence of the antecedent X and the consequent Y of a rule from independence it takes values in the range [−0.25,0.25].<br>


> low confidence **-->** due to few purchases of male cosmetics in general<br>
left > 1  **-->**  item Y is likely to be bought if item X is bought<br>
left < 1  **-->**  item Y is unlikely to be bought if item X is bought.<br>
left = 1  **-->**  which implies no association between items<br>
Finally, it should be noted that Support values equal to unity implies Lift values close to unity, but the opposite is not necessarily true<br>

In [22]:
# rules derived from the frequent itemsets only if the level of lift is above the 50 percent threshold
rules = association_rules(frequent_items, metric='lift', min_threshold=1.1)
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules

# left = 1 --> 3950 rule
# left = 1.1 --> 3868 rule
# left = .5 --> 4126 rule


  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
0,(100% Whole Wheat Bread),(Banana),0.018668,0.176420,0.004869,0.260845,1.478546,0.001576,1.114218,1
1,(Banana),(100% Whole Wheat Bread),0.176420,0.018668,0.004869,0.027602,1.478546,0.001576,1.009187,1
2,(100% Whole Wheat Bread),(Organic Hass Avocado),0.018668,0.064379,0.001481,0.079338,1.232351,0.000279,1.016248,1
3,(Organic Hass Avocado),(100% Whole Wheat Bread),0.064379,0.018668,0.001481,0.023006,1.232351,0.000279,1.004440,1
4,(100% Whole Wheat Bread),(Organic Raspberries),0.018668,0.045594,0.001183,0.063356,1.389571,0.000332,1.018964,1
...,...,...,...,...,...,...,...,...,...,...
3863,"(Organic Raspberries, Organic Strawberries)","(Bag of Organic Bananas, Organic Hass Avocado)",0.013074,0.021449,0.001811,0.138549,6.459440,0.001531,1.135934,2
3864,(Bag of Organic Bananas),"(Organic Hass Avocado, Organic Raspberries, Or...",0.142376,0.002866,0.001811,0.012723,4.438737,0.001403,1.009983,1
3865,(Organic Hass Avocado),"(Bag of Organic Bananas, Organic Raspberries, ...",0.064379,0.005306,0.001811,0.028136,5.302408,0.001470,1.023491,1
3866,(Organic Raspberries),"(Bag of Organic Bananas, Organic Hass Avocado,...",0.045594,0.006063,0.001811,0.039729,6.552826,0.001535,1.035059,1


In [23]:
# rules that satisfy the following criteria: at least 2 antecedents , a confidence > 0.75 & a lift score > 1.2
rules[ (rules['antecedent_len'] >= 1) &
       (rules['confidence'] >= .3) &
       (rules['lift'] >= 3) ]

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
2132,"(Apple Honeycrisp Organic, Organic Hass Avocado)",(Bag of Organic Bananas),0.002962,0.142376,0.001300,0.438849,3.082321,0.000878,1.528330,2
2138,"(Apple Honeycrisp Organic, Organic Strawberries)",(Bag of Organic Bananas),0.003559,0.142376,0.001598,0.449102,3.154333,0.001092,1.556774,2
2139,"(Apple Honeycrisp Organic, Bag of Organic Bana...",(Organic Strawberries),0.005147,0.091668,0.001598,0.310559,3.387884,0.001127,1.317491,2
2199,"(Bag of Organic Bananas, Fresh Cauliflower)",(Organic Strawberries),0.004166,0.091668,0.001289,0.309463,3.375926,0.000907,1.315400,2
2212,"(Organic Strawberries, Large Alfresco Eggs)",(Bag of Organic Bananas),0.002557,0.142376,0.001108,0.433333,3.043581,0.000744,1.513454,2
...,...,...,...,...,...,...,...,...,...,...
3841,"(Bag of Organic Bananas, Organic Cucumber, Org...",(Organic Hass Avocado),0.003101,0.064379,0.001087,0.350515,5.444534,0.000887,1.440559,3
3843,"(Organic Cucumber, Organic Hass Avocado, Organ...",(Bag of Organic Bananas),0.001811,0.142376,0.001087,0.600000,4.214189,0.000829,2.144060,3
3854,"(Bag of Organic Bananas, Organic Hass Avocado,...",(Organic Strawberries),0.004454,0.091668,0.001811,0.406699,4.436669,0.001403,1.530980,3
3856,"(Bag of Organic Bananas, Organic Raspberries, ...",(Organic Hass Avocado),0.005306,0.064379,0.001811,0.341365,5.302408,0.001470,1.420546,3


rules based on huge num of items --> support
rules based on 
rules based on

In [25]:
# sort rules ascending based on metric
def sorted_rules_asc_based_on_metric(metric='support'):
    return rules.sort_values(metric)

  and should_run_async(code)


In [41]:
sorted_rules_asc_based_on_metric('lift')

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
290,(Banana),(Organic Baby Arugula),0.176420,0.022067,0.004283,0.024280,1.100268,0.000390,1.002268,1
291,(Organic Baby Arugula),(Banana),0.022067,0.176420,0.004283,0.194109,1.100268,0.000390,1.021950,1
292,(Banana),(Organic Baby Spinach),0.176420,0.085828,0.016665,0.094462,1.100586,0.001523,1.009534,1
293,(Organic Baby Spinach),(Banana),0.085828,0.176420,0.016665,0.194165,1.100586,0.001523,1.022021,1
1536,(Organic Hass Avocado),(Organic Fuji Apple),0.064379,0.025850,0.001833,0.028467,1.101263,0.000169,1.002694,1
...,...,...,...,...,...,...,...,...,...,...
3452,(Sparkling Water Grapefruit),"(Sparkling Lemon Water, Lime Sparkling Water)",0.028364,0.002706,0.001279,0.045079,16.656117,0.001202,1.044373,1
3451,(Sparkling Lemon Water),"(Lime Sparkling Water, Sparkling Water Grapefr...",0.011465,0.004092,0.001279,0.111524,27.256622,0.001232,1.120918,1
3450,"(Lime Sparkling Water, Sparkling Water Grapefr...",(Sparkling Lemon Water),0.004092,0.011465,0.001279,0.312500,27.256622,0.001232,1.437869,2
3453,(Lime Sparkling Water),"(Sparkling Lemon Water, Sparkling Water Grapef...",0.015727,0.002664,0.001279,0.081301,30.520325,0.001237,1.085596,1


In [123]:
rules['leverage'].min()
# rules['confidence'].min()
# rules['lift'].min()
# rules['leverage'].min()
# rules['conviction'].min()
# rules['antecedent_len'].max()

  and should_run_async(code)


3

In [113]:
# the fun. initialized with min. value for each metric
def rules_with_specific_threshold(support=.001,confidence=.005,lift=1.1,leverage=9.5e-05,conviction=1):
    return rules[ (rules['support'] >= support) &
       (rules['confidence'] >= confidence) &
       (rules['lift'] >= lift)&
       (rules['leverage'] >= leverage) &
       (rules['conviction'] >= conviction)  ]

  and should_run_async(code)


In [125]:
# rules_with_specific_threshold(lift=3)
rules_with_specific_threshold(support=.021,lift=1)



  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
186,(Bag of Organic Bananas),(Organic Hass Avocado),0.142376,0.064379,0.021449,0.150651,2.340054,0.012283,1.101574,1
187,(Organic Hass Avocado),(Bag of Organic Bananas),0.064379,0.142376,0.021449,0.333168,2.340054,0.012283,1.286117,1
214,(Bag of Organic Bananas),(Organic Strawberries),0.142376,0.091668,0.025456,0.178791,1.950424,0.012404,1.106091,1
215,(Organic Strawberries),(Bag of Organic Bananas),0.091668,0.142376,0.025456,0.277694,1.950424,0.012404,1.187341,1


In [119]:
def select_rules_with_antecedents_length(len):
    return rules[ rules['antecedent_len'] == len]

  and should_run_async(code)


In [126]:
select_rules_with_antecedents_length(3)

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
3826,"(Bag of Organic Bananas, Organic Hass Avocado,...",(Organic Baby Spinach),0.006063,0.085828,0.001385,0.228471,2.66195,0.000865,1.184883,3
3827,"(Bag of Organic Bananas, Organic Hass Avocado,...",(Organic Strawberries),0.004188,0.091668,0.001385,0.330789,3.60857,0.001001,1.357318,3
3828,"(Bag of Organic Bananas, Organic Strawberries,...",(Organic Hass Avocado),0.004752,0.064379,0.001385,0.29148,4.527537,0.001079,1.320528,3
3829,"(Organic Hass Avocado, Organic Strawberries, O...",(Bag of Organic Bananas),0.002515,0.142376,0.001385,0.550847,3.868959,0.001027,1.909427,3
3840,"(Bag of Organic Bananas, Organic Cucumber, Org...",(Organic Strawberries),0.00276,0.091668,0.001087,0.393822,4.296203,0.000834,1.498459,3
3841,"(Bag of Organic Bananas, Organic Cucumber, Org...",(Organic Hass Avocado),0.003101,0.064379,0.001087,0.350515,5.444534,0.000887,1.440559,3
3842,"(Bag of Organic Bananas, Organic Hass Avocado,...",(Organic Cucumber),0.006063,0.03204,0.001087,0.179262,5.594854,0.000893,1.179377,3
3843,"(Organic Cucumber, Organic Hass Avocado, Organ...",(Bag of Organic Bananas),0.001811,0.142376,0.001087,0.6,4.214189,0.000829,2.14406,3
3854,"(Bag of Organic Bananas, Organic Hass Avocado,...",(Organic Strawberries),0.004454,0.091668,0.001811,0.406699,4.436669,0.001403,1.53098,3
3855,"(Bag of Organic Bananas, Organic Hass Avocado,...",(Organic Raspberries),0.006063,0.045594,0.001811,0.29877,6.552826,0.001535,1.361045,3


In [94]:
# take set of antecedents names max names is 3
def select_rules_with_antecedents_names(names=set()):
    return rules[rules['antecedents'].apply(lambda x:  names in {x})]


  and should_run_async(code)


In [101]:
# select_rules_with_antecedents_names({'Small Hass Avocado'})
# select_rules_with_antecedents_names({'Broccoli Crown'})
# select_rules_with_antecedents_names({'Bag of Organic Bananas','Organic Hass Avocado'})
select_rules_with_antecedents_names({'Organic Hass Avocado'})

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
3,(Organic Hass Avocado),(100% Whole Wheat Bread),0.064379,0.018668,0.001481,0.023006,1.232351,0.000279,1.004440,1
33,(Organic Hass Avocado),(Apple Honeycrisp Organic),0.064379,0.017304,0.002962,0.046011,2.658963,0.001848,1.030092,1
88,(Organic Hass Avocado),(Asparagus),0.064379,0.026180,0.002920,0.045349,1.732204,0.001234,1.020080,1
187,(Organic Hass Avocado),(Bag of Organic Bananas),0.064379,0.142376,0.021449,0.333168,2.340054,0.012283,1.286117,1
424,(Organic Hass Avocado),(Carrots),0.064379,0.017507,0.001801,0.027971,1.597727,0.000674,1.010765,1
...,...,...,...,...,...,...,...,...,...,...
3794,(Organic Hass Avocado),"(Organic Yellow Onion, Organic Strawberries)",0.064379,0.005178,0.001513,0.023502,4.538430,0.001180,1.018765,1
3799,(Organic Hass Avocado),"(Organic Zucchini, Organic Strawberries)",0.064379,0.005519,0.001247,0.019364,3.508404,0.000891,1.014118,1
3837,(Organic Hass Avocado),"(Bag of Organic Bananas, Organic Strawberries,...",0.064379,0.004752,0.001385,0.021516,4.527537,0.001079,1.017132,1
3852,(Organic Hass Avocado),"(Bag of Organic Bananas, Organic Cucumber, Org...",0.064379,0.003101,0.001087,0.016882,5.444534,0.000887,1.014018,1


In [102]:
# take set of consequents names max names is 3
def select_rules_with_consequents_names(names=set()):
    return rules[rules['consequents'].apply(lambda x:  names in {x})]


  and should_run_async(code)


In [104]:
# select_rules_with_consequents_names({'Small Hass Avocado'})
# select_rules_with_consequents_names({'Broccoli Crown'})
# select_rules_with_consequents_names({'Bag of Organic Bananas','Organic Hass Avocado'})
select_rules_with_consequents_names({'Organic Hass Avocado'})

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
2135,(Apple Honeycrisp Organic),"(Bag of Organic Bananas, Organic Hass Avocado)",0.017304,0.021449,0.0013,0.075123,3.502388,0.000929,1.058034,1
2155,(Asparagus),"(Bag of Organic Bananas, Organic Hass Avocado)",0.02618,0.021449,0.001108,0.042328,1.973416,0.000547,1.021802,1
2197,(Fresh Cauliflower),"(Bag of Organic Bananas, Organic Hass Avocado)",0.019382,0.021449,0.001012,0.052226,2.434902,0.000597,1.032473,1
2234,(Large Lemon),"(Bag of Organic Bananas, Organic Hass Avocado)",0.063111,0.021449,0.001492,0.023637,1.101988,0.000138,1.002241,1
2259,(Limes),"(Bag of Organic Bananas, Organic Hass Avocado)",0.045115,0.021449,0.001737,0.038498,1.794846,0.000769,1.017731,1
2277,(Michigan Organic Kale),"(Bag of Organic Bananas, Organic Hass Avocado)",0.020714,0.021449,0.001044,0.050412,2.350284,0.0006,1.0305,1
2301,(Organic Baby Arugula),"(Bag of Organic Bananas, Organic Hass Avocado)",0.022067,0.021449,0.001023,0.046354,2.161134,0.00055,1.026116,1
2313,(Organic Baby Carrots),"(Bag of Organic Bananas, Organic Hass Avocado)",0.026414,0.021449,0.001343,0.050827,2.369652,0.000776,1.030951,1
2373,(Organic Baby Spinach),"(Bag of Organic Bananas, Organic Hass Avocado)",0.085828,0.021449,0.004188,0.04879,2.274665,0.002347,1.028743,1
2451,(Organic Blueberries),"(Bag of Organic Bananas, Organic Hass Avocado)",0.036058,0.021449,0.001449,0.040189,1.873696,0.000676,1.019525,1
