In [None]:
# installing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

In [None]:
#importing
df = pd.read_csv('/content/retail_bakery_transactions (1).csv')
df.shape
df.head()

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Transaction      20507 non-null  int64 
 1   Item             20507 non-null  object
 2   date_time        20507 non-null  object
 3   period_day       20507 non-null  object
 4   weekday_weekend  20507 non-null  object
dtypes: int64(1), object(4)
memory usage: 801.2+ KB


In [None]:
#separating transaction items
transactions = df.groupby('Transaction')['Item'].apply(set).tolist()

In [None]:
# combining item and time together
df['Item_Time'] = df['Item'] + ' (' + df['period_day'] + ')'

In [None]:
# combining item and days together
df['Item_Weekend'] = df['Item'] + ' [' + df['weekday_weekend'] + ']'

In [None]:
#combining both time and days
df['Item_Context'] = df['Item'] + ' (' + df['period_day'] + ', ' + df['weekday_weekend'] + ')'

In [None]:
df['Item_Context']

Unnamed: 0,Item_Context
0,"Bread (morning, weekend)"
1,"Scandinavian (morning, weekend)"
2,"Scandinavian (morning, weekend)"
3,"Hot chocolate (morning, weekend)"
4,"Jam (morning, weekend)"
...,...
20502,"Coffee (afternoon, weekend)"
20503,"Tea (afternoon, weekend)"
20504,"Coffee (afternoon, weekend)"
20505,"Pastry (afternoon, weekend)"


In [None]:
#To apply FP-Growth, we need to convert the data into "transactions", where each row is a list of items bought together in a single transaction.
# we need a modified item set here, like coffee (morning)

# Group by Transaction to create baskets
transactions = df.groupby('Transaction')['Item_Time'].apply(lambda x: list(set(x))).tolist()

# Preview the first few baskets
for t in transactions[:5]:
    print(t)


['Bread (morning)']
['Scandinavian (morning)']
['Jam (morning)', 'Hot chocolate (morning)', 'Cookies (morning)']
['Muffin (morning)']
['Bread (morning)', 'Pastry (morning)', 'Coffee (morning)']


In [None]:
#applying one hot coding

# Initialize encoder and fit-transform the transaction data
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)

# Create one-hot encoded DataFrame
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Preview the first few rows
df_encoded.head()

Unnamed: 0,Adjustment (evening),Afternoon with the baker (afternoon),Afternoon with the baker (evening),Afternoon with the baker (morning),Alfajores (afternoon),Alfajores (evening),Alfajores (morning),Argentina Night (afternoon),Argentina Night (morning),Art Tray (afternoon),...,Valentine's card (night),Vegan Feast (afternoon),Vegan Feast (evening),Vegan Feast (morning),Vegan Feast (night),Vegan mincepie (afternoon),Vegan mincepie (evening),Vegan mincepie (morning),Victorian Sponge (afternoon),Victorian Sponge (morning)
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:

# Apply FP-Growth
frequent_itemsets = fpgrowth(df_encoded, min_support=0.01, use_colnames=True)

# Sort itemsets by support (most common first)
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

# Preview top results
frequent_itemsets.head()


Unnamed: 0,support,itemsets
12,0.247227,(Coffee (afternoon))
5,0.223244,(Coffee (morning))
15,0.164395,(Bread (afternoon))
0,0.157422,(Bread (morning))
18,0.091284,(Tea (afternoon))


In [None]:
# Generating association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.3)

# Sort rules by lift (strongest rules first)
rules = rules.sort_values(by='lift', ascending=False)

# View top 5 rules
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10)


Unnamed: 0,antecedents,consequents,support,confidence,lift
8,(Toast (morning)),(Coffee (morning)),0.015531,0.720588,3.227812
16,(Cookies (morning)),(Coffee (morning)),0.012361,0.6,2.687648
4,(Medialuna (morning)),(Coffee (morning)),0.023666,0.589474,2.640496
1,(Pastry (morning)),(Coffee (morning)),0.033492,0.554196,2.482472
15,(Hot chocolate (morning)),(Coffee (morning)),0.012361,0.539171,2.415167
11,(Cake (morning)),(Coffee (morning)),0.014369,0.525097,2.352124
12,(Pastry (afternoon)),(Coffee (afternoon)),0.013735,0.55794,2.256795
2,(Sandwich (afternoon)),(Coffee (afternoon)),0.033492,0.537288,2.173262
20,(Medialuna (afternoon)),(Coffee (afternoon)),0.010777,0.536842,2.171457
5,(Pastry (morning)),(Bread (morning)),0.020391,0.337413,2.143363


# Interpretation

Here based on the association rules generated. we can tell a lot about costumer buying behaviour.   

Lets consider few examples and interpret this   

example 1:
we have people buying toast in the morning, we tend to see that these people also buy a coffee consequently.  

here support(0.01) means 1% of all these transactions include buying both of these.

confidence(0.72) here means that 72% of pastry buyers will buy coffee as well in the morning

lift(3.22) means this itemset is being bought 3.22x times more than any random itemset.

In [None]:
# Re-run with lower support and higher confidence
frequent_itemsets_low_support = fpgrowth(df_encoded, min_support=0.005, use_colnames=True)
rules_low_support = association_rules(frequent_itemsets_low_support, metric="confidence", min_threshold=0.5)
rules_low_support.sort_values(by='lift', ascending=False).head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
16,(Toast (morning)),(Coffee (morning)),0.021553,0.223244,0.015531,0.720588,3.227812,1.0,0.010719,2.779971,0.705396,0.067742,0.640284,0.395079
5,(Juice (morning)),(Coffee (morning)),0.01384,0.223244,0.008452,0.610687,2.735519,1.0,0.005362,1.995198,0.643343,0.036969,0.498797,0.324274
19,(Toast (afternoon)),(Coffee (afternoon)),0.012044,0.247227,0.008135,0.675439,2.732063,1.0,0.005158,2.319356,0.641705,0.032394,0.568846,0.354172
1,(Cookies (morning)),(Coffee (morning)),0.020602,0.223244,0.012361,0.6,2.687648,1.0,0.007762,1.941891,0.641136,0.0534,0.485038,0.327686
13,(Alfajores (morning)),(Coffee (morning)),0.010671,0.223244,0.006339,0.594059,2.661038,1.0,0.003957,1.913473,0.630939,0.027855,0.47739,0.311228


when we Lower min_support (e.g., from 0.01 → 0.005)  
More rules discovered  
Rarer itemsets included — might be less reliable or less useful  
Useful if you're looking for niche patterns  

When we Raise min_support (e.g., to 0.02 or 0.05)  
Focuses on popular combinations  
May miss out on interesting low-frequency behavior  



Lower min_confidence (e.g., 0.3 → 0.2)  
More rules discovered  
Some rules may be weak or misleading  

Raise min_confidence (e.g., to 0.5 or 0.7)  
Rules become more reliable  
You may miss valid patterns with moderate confidence but high lift  