# Chap 13 - Ex1: apply Apriori for store_data.csv

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbn

np.set_printoptions(suppress = True)
pd.set_option('display.width', 1000)

In [9]:
store_data = pd.read_csv('Data/store_data.csv')
store_data

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7496,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7497,chicken,,,,,,,,,,,,,,,,,,,
7498,escalope,green tea,,,,,,,,,,,,,,,,,,


In [12]:
store_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7500 entries, 0 to 7499
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   shrimp             7500 non-null   object 
 1   almonds            5746 non-null   object 
 2   avocado            4388 non-null   object 
 3   vegetables mix     3344 non-null   object 
 4   green grapes       2528 non-null   object 
 5   whole weat flour   1863 non-null   object 
 6   yams               1368 non-null   object 
 7   cottage cheese     980 non-null    object 
 8   energy drink       653 non-null    object 
 9   tomato juice       394 non-null    object 
 10  low fat yogurt     255 non-null    object 
 11  green tea          153 non-null    object 
 12  honey              86 non-null     object 
 13  salad              46 non-null     object 
 14  mineral water      24 non-null     object 
 15  salmon             7 non-null      object 
 16  antioxydant juice  3 non

## <span style = 'color : yellow'> Convert dataframe into list of transactions

In [90]:
transactions_lst = []

for row in range(store_data.shape[0]):
    transaction = [item for item in store_data.columns if pd.notna(store_data.loc[row, item])]
    transactions_lst.append(transaction)

len(transactions_lst)

7500

## <span style = 'color : yellow'> Use TransactionEncoder to handle transactions_lst

In [93]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

trans_encode = TransactionEncoder()
trans_encode.fit(transactions_lst)

arr_transactions = trans_encode.transform(transactions_lst)

df_transactions = pd.DataFrame(arr_transactions, columns = trans_encode.columns_)
df_transactions

Unnamed: 0,almonds,antioxydant juice,avocado,cottage cheese,energy drink,frozen smoothie,green grapes,green tea,honey,low fat yogurt,mineral water,salad,salmon,shrimp,spinach,tomato juice,vegetables mix,whole weat flour,yams
0,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
2,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
3,True,False,True,False,False,False,True,False,False,False,False,False,False,True,False,False,True,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
7496,True,False,True,False,False,False,True,False,False,False,False,False,False,True,False,False,True,True,False
7497,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
7498,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False


## <span style = 'color : yellow'> Apply apriori and association_rules

In [103]:
frequent_items = apriori(df_transactions, min_support=0.3, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.766133,(almonds)
1,0.585067,(avocado)
2,0.337067,(green grapes)
3,1.0,(shrimp)
4,0.445867,(vegetables mix)
5,0.585067,"(almonds, avocado)"
6,0.337067,"(almonds, green grapes)"
7,0.766133,"(almonds, shrimp)"
8,0.445867,"(almonds, vegetables mix)"
9,0.337067,"(green grapes, avocado)"


In [123]:
asc_rules = association_rules(frequent_items, metric='confidence', min_threshold=0.3,
                             return_metrics=['support', 'confidence', 'lift', 'leverage', 'conviction'])
asc_rules

Unnamed: 0,antecedents,consequents,support,confidence,lift,leverage,conviction
0,(almonds),(avocado),0.585067,0.763662,1.305256,0.136828,1.755675
1,(avocado),(almonds),0.585067,1.000000,1.305256,0.136828,inf
2,(almonds),(green grapes),0.337067,0.439958,1.305256,0.078829,1.183721
3,(green grapes),(almonds),0.337067,1.000000,1.305256,0.078829,inf
4,(almonds),(shrimp),0.766133,1.000000,1.000000,0.000000,inf
...,...,...,...,...,...,...,...
175,(shrimp),"(almonds, green grapes, avocado, vegetables mix)",0.337067,0.337067,1.000000,0.000000,1.000000
176,(avocado),"(almonds, green grapes, shrimp, vegetables mix)",0.337067,0.576117,1.709207,0.139860,1.563952
177,(almonds),"(green grapes, shrimp, avocado, vegetables mix)",0.337067,0.439958,1.305256,0.078829,1.183721
178,(green grapes),"(almonds, shrimp, avocado, vegetables mix)",0.337067,1.000000,2.242823,0.186780,inf


In [121]:
asc_rules = association_rules(frequent_items, metric='lift', min_threshold=1.1, 
                              return_metrics=['support', 'confidence', 'lift', 'leverage', 'conviction'])
asc_rules

Unnamed: 0,antecedents,consequents,support,confidence,lift,leverage,conviction
0,(almonds),(avocado),0.585067,0.763662,1.305256,0.136828,1.755675
1,(avocado),(almonds),0.585067,1.000000,1.305256,0.136828,inf
2,(almonds),(green grapes),0.337067,0.439958,1.305256,0.078829,1.183721
3,(green grapes),(almonds),0.337067,1.000000,1.305256,0.078829,inf
4,(almonds),(vegetables mix),0.445867,0.581970,1.305256,0.104273,1.325583
...,...,...,...,...,...,...,...
145,"(green grapes, vegetables mix)","(almonds, shrimp, avocado)",0.337067,1.000000,1.709207,0.139860,inf
146,(avocado),"(almonds, green grapes, shrimp, vegetables mix)",0.337067,0.576117,1.709207,0.139860,1.563952
147,(almonds),"(green grapes, shrimp, avocado, vegetables mix)",0.337067,0.439958,1.305256,0.078829,1.183721
148,(green grapes),"(almonds, shrimp, avocado, vegetables mix)",0.337067,1.000000,2.242823,0.186780,inf
