In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
import mlxtend.preprocessing
import mlxtend.frequent_patterns


In [5]:
shopping = pd.read_csv("market_basket.csv")

shopping.head(10)

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend
5,3,Cookies,30-10-2016 10:07,morning,weekend
6,4,Muffin,30-10-2016 10:08,morning,weekend
7,5,Coffee,30-10-2016 10:13,morning,weekend
8,5,Pastry,30-10-2016 10:13,morning,weekend
9,5,Bread,30-10-2016 10:13,morning,weekend


In [6]:
## check missing values

shopping.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Transaction      20507 non-null  int64 
 1   Item             20507 non-null  object
 2   date_time        20507 non-null  object
 3   period_day       20507 non-null  object
 4   weekday_weekend  20507 non-null  object
dtypes: int64(1), object(4)
memory usage: 801.2+ KB


In [7]:
## Check what are the top 5 most frequently sold items in general.

print ("The top 5 most frequently sold items： ","\n",shopping.Item.value_counts().head(5))

The top 5 most frequently sold items：  
 Coffee    5471
Bread     3325
Tea       1435
Cake      1025
Pastry     856
Name: Item, dtype: int64


In [9]:
# Are the most frequent items the same on a weekday and in the weekend? 
# What can you say about afternoons and mornings?

print ("Weekday top 5 most frequent sold items: ", "\n",shopping[shopping.weekday_weekend=="weekday"].Item.value_counts().head(5))
print ("*"*60)
print ("Weekend top 5 most frequent sold items:", "\n",shopping[shopping.weekday_weekend=="weekend"].Item.value_counts().head(5))
print ("*"*60)
print ("Morning top 5 most frequent sold items:","\n", shopping[shopping.period_day=="morning"].Item.value_counts().head(5))
print ("*"*60)
print ("afternoon top 5 most frequent sold items:", "\n",shopping[shopping.period_day=="afternoon"].Item.value_counts().head(5))


## we can see that both weekday and weekend top 5 most frequent sold items is the same as top 5 most frequent sold items in general.

## but when it comes to "morning" and "afternon", coffee and bread is still the top 2 most frequent sold items,
## the top 3 to top 5 most frequent sold items is diffrent. 

Weekday top 5 most frequent sold items:  
 Coffee    3543
Bread     2092
Tea        976
Cake       612
Pastry     566
Name: Item, dtype: int64
************************************************************
Weekend top 5 most frequent sold items: 
 Coffee    1928
Bread     1233
Tea        459
Cake       413
Pastry     290
Name: Item, dtype: int64
************************************************************
Morning top 5 most frequent sold items: 
 Coffee       2561
Bread        1610
Pastry        604
Tea           456
Medialuna     402
Name: Item, dtype: int64
************************************************************
afternoon top 5 most frequent sold items: 
 Coffee      2823
Bread       1661
Tea          930
Cake         731
Sandwich     671
Name: Item, dtype: int64


In [10]:
## why morning coffee + afternoon coffee is not 5471?

shopping.period_day.unique()

## in addition to "morning" and "afternoon", there are still "evening" and "night"

array(['morning', 'afternoon', 'evening', 'night'], dtype=object)

In [186]:
# Generate frequent item-sets and create association rules for morning transactions.

# transactions that took place in the mornings

morning = shopping[shopping.period_day=="morning"]
morning

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend
...,...,...,...,...,...
20465,9664,Coffee,09-04-2017 11:40,morning,weekend
20466,9664,Bread,09-04-2017 11:40,morning,weekend
20467,9665,Coffee,09-04-2017 11:59,morning,weekend
20468,9665,Juice,09-04-2017 11:59,morning,weekend


In [12]:
morning_list = morning.groupby(['Transaction'])['Item'].apply(list).values.tolist()

morning_list

[['Bread'],
 ['Scandinavian', 'Scandinavian'],
 ['Hot chocolate', 'Jam', 'Cookies'],
 ['Muffin'],
 ['Coffee', 'Pastry', 'Bread'],
 ['Medialuna', 'Pastry', 'Muffin'],
 ['Medialuna', 'Pastry', 'Coffee', 'Tea'],
 ['Pastry', 'Bread'],
 ['Bread', 'Muffin'],
 ['Scandinavian', 'Medialuna'],
 ['Bread', 'Medialuna', 'Bread'],
 ['Jam', 'Coffee', 'Tartine', 'Pastry', 'Tea'],
 ['Basket', 'Bread', 'Coffee'],
 ['Bread', 'Medialuna', 'Pastry'],
 ['Mineral water', 'Scandinavian'],
 ['Bread', 'Medialuna', 'Coffee'],
 ['Hot chocolate'],
 ['Farm House'],
 ['Farm House', 'Bread'],
 ['Bread', 'Medialuna'],
 ['Coffee', 'Coffee', 'Medialuna', 'Bread'],
 ['Jam'],
 ['Scandinavian', 'Muffin'],
 ['Bread'],
 ['Scandinavian'],
 ['Fudge'],
 ['Scandinavian'],
 ['Coffee', 'Bread'],
 ['Bread', 'Jam'],
 ['Bread'],
 ['Basket'],
 ['Scandinavian', 'Muffin'],
 ['Coffee'],
 ['Coffee', 'Muffin'],
 ['Muffin', 'Scandinavian'],
 ['Tea', 'Bread'],
 ['Coffee', 'Bread'],
 ['Bread', 'Tea'],
 ['Scandinavian'],
 ['Juice', 'Tartine', 

In [13]:
encoder = mlxtend.preprocessing.TransactionEncoder().fit(morning_list)

# transactional format 
encoded_data = encoder.transform(morning_list)

# convert it to dataframe
morning_df = pd.DataFrame(encoded_data, columns = encoder.columns_)

print(morning_df.head())

   Afternoon with the baker  Alfajores  Argentina Night  Art Tray  Baguette  \
0                     False      False            False     False     False   
1                     False      False            False     False     False   
2                     False      False            False     False     False   
3                     False      False            False     False     False   
4                     False      False            False     False     False   

   Bakewell  Basket  Bread  Brioche and salami  Brownie  ...    Tea  The BART  \
0     False   False   True               False    False  ...  False     False   
1     False   False  False               False    False  ...  False     False   
2     False   False  False               False    False  ...  False     False   
3     False   False  False               False    False  ...  False     False   
4     False   False   True               False    False  ...  False     False   

   The Nomad  Tiffin  Toast  Truffles 

In [14]:
morning_df.shape

# There are 4103 transactions in the morning, and the total items that 

(4103, 76)

In [15]:
print (shopping.Item.unique().size)

### why there are total 94 items for the whole dataset, and there are only 76 columns in the morning? 
### maybe because something people don't buy in the morning.

94


In [19]:
### confirm the assumption. 

shopping_list = shopping.groupby(['Transaction'])['Item'].apply(list).values.tolist()

encoder = mlxtend.preprocessing.TransactionEncoder().fit(shopping_list)

# transactional format 
encoded_data = encoder.transform(shopping_list)

# convert it to dataframe
shopping_df = pd.DataFrame(encoded_data, columns = encoder.columns_)

print("shopping_df.shape: ", shopping_df.shape,"\n")

### what items people don't buy in the morning? 
morning_difference = [item for item in shopping.Item.unique() if item not in morning.Item.unique()]
print ("Items that people don't buy in the morning: ","\n", morning_difference)

print ("\n",len(morning_difference))
print (94-76)

shopping_df.shape:  (9465, 94) 

Items that people don't buy in the morning:  
 ['Pick and Mix Bowls', 'Chicken sand', 'Fairy Doors', 'Bowl Nic Pitt', 'Bread Pudding', 'Adjustment', 'Chimichurri Oil', 'Bacon', 'Spread', 'Olum & polenta', 'Polenta', 'Hack the stack', 'Crepes', 'Bare Popcorn', 'Pintxos', 'Tshirt', 'Postcard', 'Raw bars']

 18
18


In [22]:
# top 5 support count of single item in the morning
print (morning_df.sum().sort_values(ascending = False)[:5],"\n")

# top 5 quantity of single item sold in the morning.
print (morning.Item.value_counts().head(5))

## one transaction may have two same items


Coffee       2113
Bread        1490
Pastry        572
Tea           441
Medialuna     380
dtype: int64 

Coffee       2561
Bread        1610
Pastry        604
Tea           456
Medialuna     402
Name: Item, dtype: int64


In [163]:
# Generate reasonable frequnt itemset in the morning. 

morning_itemset_1 = mlxtend.frequent_patterns.apriori(morning_df, min_support = 0.1, max_len = 3, use_colnames = True)
print ("morning_itemset_1: ","\n", morning_itemset_1.head(),"\n", "total number: ", morning_itemset_1.shape[0],"\n")

morning_itemset_2 = mlxtend.frequent_patterns.apriori(morning_df, min_support = 0.05, max_len = 3, use_colnames = True)
print ("morning_itemset_2: ","\n", morning_itemset_2.head(),"\n", "total number: ", morning_itemset_2.shape[0],"\n")

morning_itemset_3 = mlxtend.frequent_patterns.apriori(morning_df, min_support = 0.02, max_len = 3, use_colnames = True)
print ("morning_itemset_3: ","\n", morning_itemset_3.head(),"\n", "total number: ", morning_itemset_3.shape[0],"\n")

morning_itemset_4 = mlxtend.frequent_patterns.apriori(morning_df, min_support = 0.02, max_len = 4, use_colnames = True)
print ("morning_itemset_4: ","\n", morning_itemset_4.head(),"\n", "total number: ", morning_itemset_4.shape[0],"\n")

morning_itemset_5 = mlxtend.frequent_patterns.apriori(morning_df, min_support = 0.01, max_len = 3, use_colnames = True)
print ("morning_itemset_5: ","\n", morning_itemset_5.head(),"\n", "total number: ", morning_itemset_5.shape[0],"\n")

morning_itemset_6 = mlxtend.frequent_patterns.apriori(morning_df, min_support = 0.001, max_len = 3, use_colnames = True)
print ("morning_itemset_6: ","\n", morning_itemset_6.head(),"\n", "total number: ", morning_itemset_6.shape[0],"\n")



morning_itemset_1:  
     support  itemsets
0  0.363149   (Bread)
1  0.514989  (Coffee)
2  0.139410  (Pastry)
3  0.107482     (Tea) 
 total number:  4 

morning_itemset_2:  
     support         itemsets
0  0.363149          (Bread)
1  0.063125           (Cake)
2  0.514989         (Coffee)
3  0.050451     (Farm House)
4  0.052888  (Hot chocolate) 
 total number:  11 

morning_itemset_3:  
     support     itemsets
0  0.024616  (Alfajores)
1  0.020473   (Baguette)
2  0.363149      (Bread)
3  0.029978    (Brownie)
4  0.063125       (Cake) 
 total number:  29 

morning_itemset_4:  
     support     itemsets
0  0.024616  (Alfajores)
1  0.020473   (Baguette)
2  0.363149      (Bread)
3  0.029978    (Brownie)
4  0.063125       (Cake) 
 total number:  29 

morning_itemset_5:  
     support     itemsets
0  0.024616  (Alfajores)
1  0.020473   (Baguette)
2  0.363149      (Bread)
3  0.029978    (Brownie)
4  0.063125       (Cake) 
 total number:  49 

morning_itemset_6:  
     support              

In [194]:
## I will choose morning_itemset_6 as the frequent itemset to generate association rules. 
## It means all the itemsets in this morning_itemset_6 has a greater than 0.1% chance of appearing in morning transactions.
## There are total 325 itemsets in morning_itemset_6.

# Try different value to Generate rules. 
morning_rules_1 = mlxtend.frequent_patterns.association_rules(morning_itemset_6, metric = "support", min_threshold = 0.001)
print (len(morning_rules_1))

morning_rules_2 = mlxtend.frequent_patterns.association_rules(morning_itemset_6, metric = "lift", min_threshold = 1)
print (len(morning_rules_2))

morning_rules_3 = mlxtend.frequent_patterns.association_rules(morning_itemset_6, metric = "confidence", min_threshold = 0.6)
print (len(morning_rules_3))

994
592
37


In [195]:
morning_rules_3
## rules_3 means if more than 60% of people who buy the "antecedents", also buy the "consequents".
## It seems all trivial observations, the consequents are either Coffee or Bread, all are the most frequent in general

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Bakewell),(Coffee),0.004387,0.514989,0.003168,0.722222,1.402403,0.000909,1.746039
1,(Eggs),(Bread),0.002681,0.363149,0.001706,0.636364,1.752349,0.000732,1.75134
2,(Cookies),(Coffee),0.047526,0.514989,0.028516,0.6,1.165073,0.00404,1.212527
3,(Extra Salami or Feta),(Coffee),0.002681,0.514989,0.002437,0.909091,1.765263,0.001057,5.335121
4,(Juice),(Coffee),0.031928,0.514989,0.019498,0.610687,1.185825,0.003055,1.245812
5,(Keeping It Local),(Coffee),0.011942,0.514989,0.009262,0.77551,1.505877,0.003111,2.160503
6,(Smoothies),(Coffee),0.005118,0.514989,0.003168,0.619048,1.20206,0.000533,1.273154
7,(Spanish Brunch),(Coffee),0.011699,0.514989,0.008287,0.708333,1.375434,0.002262,1.662895
8,(Tartine),(Coffee),0.004631,0.514989,0.003168,0.684211,1.328592,0.000784,1.535868
9,(The Nomad),(Coffee),0.006337,0.514989,0.004143,0.653846,1.269631,0.00088,1.401143


In [196]:
selection = morning_rules_3['antecedents'].apply(lambda x: 'Eggs' in x or "Toast" in x)

morning_rules_3[selection]

## If a person buy eggs in the morning, I would recommend Bread to him or her because there are more than 63.6% of people
## who buy Eggs in the morning also buy Bread.

## If a person buy Toast in the morning, I would recommend Coffee.


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1,(Eggs),(Bread),0.002681,0.363149,0.001706,0.636364,1.752349,0.000732,1.75134
10,(Toast),(Coffee),0.04972,0.514989,0.035827,0.720588,1.39923,0.010222,1.735829
26,"(Cookies, Toast)",(Coffee),0.002681,0.514989,0.00195,0.727273,1.41221,0.000569,1.778374
35,"(Medialuna, Toast)",(Coffee),0.003656,0.514989,0.002194,0.6,1.165073,0.000311,1.212527
36,"(Pastry, Toast)",(Coffee),0.002925,0.514989,0.002681,0.916667,1.779973,0.001175,5.820132


In [185]:
# Generate frequent item-sets and create association rules for afternoon transactions.

afternoon = shopping[shopping.period_day=="afternoon"]
afternoon

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
86,43,Scandinavian,30-10-2016 12:00,afternoon,weekend
87,43,Fudge,30-10-2016 12:00,afternoon,weekend
88,44,Coffee,30-10-2016 12:05,afternoon,weekend
89,44,Medialuna,30-10-2016 12:05,afternoon,weekend
90,45,Coffee,30-10-2016 12:08,afternoon,weekend
...,...,...,...,...,...
20502,9682,Coffee,09-04-2017 14:32,afternoon,weekend
20503,9682,Tea,09-04-2017 14:32,afternoon,weekend
20504,9683,Coffee,09-04-2017 14:57,afternoon,weekend
20505,9683,Pastry,09-04-2017 14:57,afternoon,weekend


In [187]:
afternoon_list = afternoon.groupby(['Transaction'])['Item'].apply(list).values.tolist()
afternoon_list


[['Scandinavian', 'Fudge'],
 ['Coffee', 'Medialuna'],
 ['Coffee', 'Hot chocolate', 'Medialuna'],
 ['Coffee'],
 ["Ella's Kitchen Pouches", 'Juice', 'Bread', 'Muffin', 'Jam'],
 ['Coffee'],
 ['Coffee', 'Coffee', 'Medialuna'],
 ['Bread', 'Victorian Sponge'],
 ['Bread'],
 ['Scandinavian'],
 ['Bread'],
 ['Frittata', 'Coffee', 'Tea', 'Hearty & Seasonal'],
 ['Coffee', 'Frittata'],
 ['Scandinavian'],
 ['Victorian Sponge', 'Hot chocolate', 'Tea', 'Soup'],
 ['Tea'],
 ['Cookies', 'Coffee', 'Juice', 'Coffee'],
 ['Coffee'],
 ['Pick and Mix Bowls',
  'Hearty & Seasonal',
  'Hearty & Seasonal',
  'Coffee',
  'Smoothies',
  'Coffee'],
 ['Coffee'],
 ['Cake'],
 ['Tartine', 'Mighty Protein', 'Tea', 'Coffee'],
 ['Hearty & Seasonal', 'Frittata', 'Mineral water'],
 ['Hearty & Seasonal', 'Mineral water', 'Muffin'],
 ['Frittata', 'Coffee', 'Tea', 'Scandinavian', 'Chicken sand'],
 ['Bread', 'Tea', 'Victorian Sponge'],
 ['Fudge'],
 ['Muffin'],
 ['Coffee', 'Bread'],
 ['Bread'],
 ['Coffee', 'Bread'],
 ['Jam', 'Fri

In [189]:
encoder = mlxtend.preprocessing.TransactionEncoder().fit(afternoon_list)

# transactional format 
encoded_data = encoder.transform(afternoon_list)

# convert it to dataframe
afternoon_df = pd.DataFrame(encoded_data, columns = encoder.columns_)

afternoon_df

# There are 5089 transcations in the afternoon and 86 items

Unnamed: 0,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Bowl Nic Pitt,Bread,...,Tartine,Tea,The Nomad,Tiffin,Toast,Truffles,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5084,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
5085,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,True,False,False,False,False
5086,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
5087,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [218]:
# Generate reasonable frequnt itemset in the afternoon. 


afternoon_itemset_1 = mlxtend.frequent_patterns.apriori(afternoon_df, min_support = 0.02, max_len = 4, use_colnames = True)
print ("afternoon_itemset_1: ","\n", afternoon_itemset_1.head(),"\n", "total number: ", afternoon_itemset_1.shape[0],"\n")

afternoon_itemset_2 = mlxtend.frequent_patterns.apriori(afternoon_df, min_support = 0.01, max_len = 3, use_colnames = True)
print ("afternoon_itemset_2: ","\n", afternoon_itemset_2.head(),"\n", "total number: ", afternoon_itemset_2.shape[0],"\n")

afternoon_itemset_3 = mlxtend.frequent_patterns.apriori(afternoon_df, min_support = 0.001, max_len = 3, use_colnames = True)
print ("afternoon_itemset_3: ","\n", afternoon_itemset_3.head(),"\n", "total number: ", afternoon_itemset_3.shape[0],"\n")



afternoon_itemset_1:  
     support        itemsets
0  0.044606     (Alfajores)
1  0.305758         (Bread)
2  0.048929       (Brownie)
3  0.136766          (Cake)
4  0.022991  (Chicken Stew) 
 total number:  41 

afternoon_itemset_2:  
     support     itemsets
0  0.044606  (Alfajores)
1  0.013166   (Baguette)
2  0.305758      (Bread)
3  0.048929    (Brownie)
4  0.136766       (Cake) 
 total number:  70 

afternoon_itemset_3:  
     support                    itemsets
0  0.003734  (Afternoon with the baker)
1  0.044606                 (Alfajores)
2  0.006092                  (Art Tray)
3  0.013166                  (Baguette)
4  0.005895                  (Bakewell) 
 total number:  527 



In [219]:
## I will choose afternoon_itemset_3 as the frequent itemset to generate association rules in the afternoon. 
## It means all the itemsets in this afternoon_itemset_3 has a greater than 0.1% chance of appearing in morning transactions.
## There are total 527 itemsets in afternoon_itemset_3.

# Try different value to Generate rules. 
afternoon_rules_1 = mlxtend.frequent_patterns.association_rules(afternoon_itemset_3, metric = "support", min_threshold = 0.001)
print (len(afternoon_rules_1))

afternoon_rules_2 = mlxtend.frequent_patterns.association_rules(afternoon_itemset_3, metric = "lift", min_threshold = 1)
print (len(afternoon_rules_2))

afternoon_rules_3 = mlxtend.frequent_patterns.association_rules(afternoon_itemset_3, metric = "confidence", min_threshold = 0.6)
print (len(afternoon_rules_3))

1780
1218
51


In [220]:
afternoon_rules_3

## afternoon_rules_3 means if more than 60% of people who buy the "antecedents", also buy the "consequents".
## It seems all trivial observations, the consequents are either Coffee or Bread, all are the most frequent in general

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Art Tray),(Coffee),0.006092,0.459815,0.00452,0.741935,1.613551,0.001719,2.093216
1,(Crisps),(Coffee),0.002162,0.459815,0.001376,0.636364,1.383955,0.000382,1.485508
2,(Extra Salami or Feta),(Coffee),0.005109,0.459815,0.00393,0.769231,1.672913,0.001581,2.3408
3,(Granola),(Coffee),0.002162,0.459815,0.001376,0.636364,1.383955,0.000382,1.485508
4,(Keeping It Local),(Coffee),0.002751,0.459815,0.002555,0.928571,2.019444,0.00129,7.562586
5,(Mighty Protein),(Coffee),0.001572,0.459815,0.001376,0.875,1.902938,0.000653,4.321478
6,(Salad),(Coffee),0.017489,0.459815,0.011201,0.640449,1.392841,0.003159,1.502389
7,(Tartine),(Coffee),0.005109,0.459815,0.003144,0.615385,1.33833,0.000795,1.40448
8,(Toast),(Coffee),0.022401,0.459815,0.015131,0.675439,1.468935,0.00483,1.664353
9,(Vegan mincepie),(Coffee),0.005895,0.459815,0.003537,0.6,1.304872,0.000826,1.350462


In [223]:
selection = afternoon_rules_3['antecedents'].apply(lambda x: ('Coke' in x and "Juice" in x)or "Toast" in x)

afternoon_rules_3[selection]

## If a person buy coke and juice in the afternoon, I would recommend Sandwich to him or her because there are more than 61.5% of people
## who buy coke and juice in the afternoon also buy Sandwich.

## If a person buy Toast in the afternoon, I would recommend Coffee.



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
8,(Toast),(Coffee),0.022401,0.459815,0.015131,0.675439,1.468935,0.00483,1.664353
22,"(Cake, Toast)",(Coffee),0.002358,0.459815,0.001965,0.833333,1.812322,0.000881,3.241108
39,"(Juice, Toast)",(Coffee),0.002948,0.459815,0.002162,0.733333,1.594843,0.000806,2.025693
49,"(Spanish Brunch, Toast)",(Coffee),0.001572,0.459815,0.001179,0.75,1.63109,0.000456,2.160739
50,"(Coke, Juice)",(Sandwich),0.002555,0.115936,0.001572,0.615385,5.307953,0.001276,2.298566


In [226]:
selection = afternoon_rules_1['antecedents'].apply(lambda x: ('Coke' in x and "Juice" in x)or "Toast" in x)

afternoon_rules_1[selection].sort_values(by="lift", ascending = False)

## People who buy Toast in the afternoon will more likely to buy Juice and Coffee than an average customer.
## I could also recommend juice.

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1709,"(Coke, Juice)",(Sandwich),0.002555,0.115936,0.001572,0.615385,5.307953,0.001276,2.298566
1526,(Toast),"(Juice, Coffee)",0.022401,0.021615,0.002162,0.096491,4.464035,0.001677,1.082872
1688,(Toast),"(Spanish Brunch, Coffee)",0.022401,0.013559,0.001179,0.052632,3.88177,0.000875,1.041244
1524,"(Toast, Coffee)",(Juice),0.015131,0.043427,0.002162,0.142857,3.289593,0.001504,1.116002
1686,"(Toast, Coffee)",(Spanish Brunch),0.015131,0.024366,0.001179,0.077922,3.197947,0.00081,1.058082
427,(Toast),(Juice),0.022401,0.043427,0.002948,0.131579,3.029888,0.001975,1.101508
521,(Toast),(Spanish Brunch),0.022401,0.024366,0.001572,0.070175,2.880023,0.001026,1.049266
1706,(Toast),"(Tea, Coffee)",0.022401,0.057182,0.002751,0.122807,2.147646,0.00147,1.074812
1466,(Toast),"(Hot chocolate, Coffee)",0.022401,0.030261,0.001376,0.061404,2.029107,0.000698,1.033179
395,(Toast),(Hot chocolate),0.022401,0.061505,0.002751,0.122807,1.996693,0.001373,1.069884
