In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori,association_rules

#mlxtend - used in the context of frequent pattern mining and association rule learning.
#'TransactionEncoder is a class provided by mlxtend for converting a list of transactions into a one-hot encoded DataFrame.
#apriori is a function in mlxtend.frequent_patterns that implements the Apriori algorithm. It is used to find frequent itemsets in a transaction dataset.
#association_rules is another function in mlxtend.frequent_patterns that generates association rules from frequent itemsets.

In [2]:
df=pd.read_csv('Practical Exam Datasets/Apriori/Order3.csv')

In [3]:
df.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,10/30/2016 9:58,Morning,Weekend
1,2,Scandinavian,10/30/2016 10:05,Morning,Weekend
2,2,Scandinavian,10/30/2016 10:05,Morning,Weekend
3,3,Hot chocolate,10/30/2016 10:07,Morning,Weekend
4,3,Jam,10/30/2016 10:07,Morning,Weekend


In [4]:
transactions=df.groupby('TransactionNo')['Items'].apply(list).reset_index(name='Transaction_Items')
transactions

#This part of the code groups the DataFrame (df) by the 'TransactionNo' column. 
#It creates groups based on unique values in the 'TransactionNo' column, and for each group, it selects the 'Items' column.

#.apply(list)
#This part of the code groups the DataFrame (df) by the 'TransactionNo' column. 
#It creates groups based on unique values in the 'TransactionNo' column, and for each group, it selects the 'Items' column.

#After applying the list function, it resets the index of the resulting DataFrame and renames the column containing the lists of 
#items to 'Transaction_Items'. This creates a new DataFrame (transactions) where each row represents a unique transaction, and the 
#'Transaction_Items' column contains a list of items associated with that transaction.

Unnamed: 0,TransactionNo,Transaction_Items
0,1,[Bread]
1,2,"[Scandinavian, Scandinavian]"
2,3,"[Hot chocolate, Jam, Cookies]"
3,4,[Muffin]
4,5,"[Coffee, Pastry, Bread]"
...,...,...
9460,9680,[Bread]
9461,9681,"[Truffles, Tea, Spanish Brunch, Christmas common]"
9462,9682,"[Muffin, Tacos/Fajita, Coffee, Tea]"
9463,9683,"[Coffee, Pastry]"


In [5]:
#The TransactionEncoder is used to convert a list of transactions (lists of items) into a one-hot encoded matrix.
te=TransactionEncoder() 

#It converts the list of transactions into a one-hot encoded matrix (te_array), where each row represents a transaction,
#and each column represents a unique item. The values are binary (0 or 1) indicating the presence or absence of an item in a transaction.
te_array=te.fit_transform(transactions['Transaction_Items'])

#The one-hot encoded matrix (te_array) is used to create a new DataFrame (data_encoder) using pandas.
#te.columns_ provides the list of unique items (columns) from the transactions, and these are used as column names in the new DataFrame.
#The resulting DataFrame (data_encoder) is a one-hot encoded representation of the original transaction data.
data_encoder=pd.DataFrame(te_array,columns=te.columns_)

In [6]:
#It employs the Apriori algorithm, which is a classic algorithm for association rule mining.
#min_support=0.01: The minimum support threshold. It specifies the minimum proportion of transactions 
#that must contain a particular itemset for it to be considered "frequent." In this case, it's set to 1%.

frequent_items=apriori(data_encoder,min_support=0.02,use_colnames=True)
frequent_items

Unnamed: 0,support,itemsets
0,0.036344,(Alfajores)
1,0.327205,(Bread)
2,0.040042,(Brownie)
3,0.103856,(Cake)
4,0.478394,(Coffee)
5,0.054411,(Cookies)
6,0.039197,(Farm House)
7,0.05832,(Hot chocolate)
8,0.038563,(Juice)
9,0.061807,(Medialuna)


In [7]:
#lift, min_threshold is used to set the lift min val to be included in the list to display
rules=association_rules(frequent_items,metric='lift',min_threshold=1)
rules
#rules.iloc[:,:-3]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Bread),(Pastry),0.327205,0.086107,0.02916,0.089119,1.034977,0.000985,1.003306,0.050231
1,(Pastry),(Bread),0.086107,0.327205,0.02916,0.33865,1.034977,0.000985,1.017305,0.03698
2,(Coffee),(Cake),0.478394,0.103856,0.054728,0.114399,1.101515,0.005044,1.011905,0.176684
3,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664,0.10284
4,(Tea),(Cake),0.142631,0.103856,0.023772,0.166667,1.604781,0.008959,1.075372,0.439556
5,(Cake),(Tea),0.103856,0.142631,0.023772,0.228891,1.604781,0.008959,1.111865,0.420538
6,(Coffee),(Cookies),0.478394,0.054411,0.028209,0.058966,1.083723,0.002179,1.004841,0.14811
7,(Cookies),(Coffee),0.054411,0.478394,0.028209,0.518447,1.083723,0.002179,1.083174,0.0817
8,(Hot chocolate),(Coffee),0.05832,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553,0.060403
9,(Coffee),(Hot chocolate),0.478394,0.05832,0.029583,0.061837,1.060311,0.001683,1.003749,0.109048
