# Chap 13 - Ex3: apply Apriori for dataset_group.csv

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbn

np.set_printoptions(suppress = True)
pd.set_option('display.width', 1000)

In [22]:
dataset_group = (
    pd.read_csv('Data/dataset_group.csv', header=None)
    .set_axis(['Date', 'Order', 'Items'], axis = 1) #set columns' names
)

dataset_group

Unnamed: 0,Date,Order,Items
0,2000-01-01,1,yogurt
1,2000-01-01,1,pork
2,2000-01-01,1,sandwich bags
3,2000-01-01,1,lunch meat
4,2000-01-01,1,all- purpose
...,...,...,...
22338,2002-02-26,1139,soda
22339,2002-02-26,1139,laundry detergent
22340,2002-02-26,1139,vegetables
22341,2002-02-26,1139,shampoo


## <span style = 'color : yellow'> Convert dataframe into list of transactions

In [54]:
dataset_group_condensed = dataset_group.groupby('Order')['Items'].agg(lambda x: list(x))

print(dataset_group_condensed)

transactions_lst = dataset_group_condensed.tolist()

Order
1       [yogurt, pork, sandwich bags, lunch meat, all-...
2       [toilet paper, shampoo, hand soap, waffles, ve...
3       [soda, pork, soap, ice cream, toilet paper, di...
4       [cereals, juice, lunch meat, soda, toilet pape...
5       [sandwich loaves, pasta, tortillas, mixes, han...
                              ...                        
1135    [sugar, beef, sandwich bags, hand soap, paper ...
1136    [coffee/tea, dinner rolls, lunch meat, spaghet...
1137    [beef, lunch meat, eggs, poultry, vegetables, ...
1138    [sandwich bags, ketchup, milk, poultry, cheese...
1139    [soda, laundry detergent, vegetables, shampoo,...
Name: Items, Length: 1139, dtype: object


## <span style = 'color : yellow'> Use TransactionEncoder to handle transactions_lst

In [59]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

trans_encode = TransactionEncoder()
trans_encode.fit(transactions_lst)

arr_transactions = trans_encode.transform(transactions_lst)

df_transactions = pd.DataFrame(arr_transactions, columns = trans_encode.columns_)
df_transactions

Unnamed: 0,all- purpose,aluminum foil,bagels,beef,butter,cereals,cheeses,coffee/tea,dinner rolls,dishwashing liquid/detergent,...,shampoo,soap,soda,spaghetti sauce,sugar,toilet paper,tortillas,vegetables,waffles,yogurt
0,True,True,False,True,True,False,False,False,True,False,...,True,True,True,False,False,False,False,True,False,True
1,False,True,False,False,False,True,True,False,False,True,...,True,False,False,False,False,True,True,True,True,True
2,False,False,True,False,False,True,True,False,True,False,...,True,True,True,True,False,True,False,True,False,False
3,True,False,False,False,False,True,False,False,False,False,...,False,False,True,False,False,True,False,False,False,False
4,True,False,False,False,False,False,False,False,True,False,...,False,False,True,True,False,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1134,True,False,False,True,False,True,True,True,True,True,...,True,True,False,False,True,False,False,False,False,False
1135,False,False,False,False,False,True,True,True,True,True,...,False,True,False,True,False,False,False,True,False,False
1136,False,False,True,True,False,False,False,False,True,True,...,True,True,False,False,True,False,True,True,False,True
1137,True,False,False,True,False,False,True,False,False,False,...,False,True,True,True,True,True,False,True,True,True


## <span style = 'color : yellow'> Apply apriori and association_rules

In [70]:
frequent_items = apriori(df_transactions, min_support=0.3, use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.37489,(all- purpose)
1,0.384548,(aluminum foil)
2,0.385426,(bagels)
3,0.37489,(beef)
4,0.367867,(butter)
5,0.395961,(cereals)
6,0.390694,(cheeses)
7,0.37928,(coffee/tea)
8,0.388938,(dinner rolls)
9,0.38806,(dishwashing liquid/detergent)


In [75]:
asc_rules = association_rules(frequent_items, metric='lift', min_threshold=1.1,
                             return_metrics=['support', 'confidence', 'lift', 'leverage', 'conviction'])
asc_rules

Unnamed: 0,antecedents,consequents,support,confidence,lift,leverage,conviction
0,(eggs),(vegetables),0.326602,0.837838,1.13337,0.038433,1.607989
1,(vegetables),(eggs),0.326602,0.441805,1.13337,0.038433,1.093139
2,(laundry detergent),(vegetables),0.309043,0.816705,1.104783,0.029311,1.4226
3,(vegetables),(laundry detergent),0.309043,0.418052,1.104783,0.029311,1.068134
4,(vegetables),(yogurt),0.319579,0.432304,1.124188,0.035304,1.084123
5,(yogurt),(vegetables),0.319579,0.83105,1.124188,0.035304,1.543388
