In [None]:
# Step 1: Install necessary libraries (if not installed already)
!pip install mlxtend






In [None]:
# Step 2: Import necessary libraries
import pandas as pd
from pycaret.datasets import get_data
from mlxtend.frequent_patterns import apriori, association_rules


In [None]:
# Step 3: Load the Germany dataset using PyCaret's get_data() function
data = get_data('germany')

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536527,22809,SET OF 6 T-LIGHTS SANTA,6,12/1/2010 13:04,2.95,12662,Germany
1,536527,84347,ROTATING SILVER ANGELS T-LIGHT HLDR,6,12/1/2010 13:04,2.55,12662,Germany
2,536527,84945,MULTI COLOUR SILVER T-LIGHT HOLDER,12,12/1/2010 13:04,0.85,12662,Germany
3,536527,22242,5 HOOK HANGER MAGIC TOADSTOOL,12,12/1/2010 13:04,1.65,12662,Germany
4,536527,22244,3 HOOK HANGER MAGIC GARDEN,12,12/1/2010 13:04,1.95,12662,Germany


In [None]:
# Step 4: Preprocess the data (Pivot the data to one-hot encode transactions)
# Assuming the dataset has 'InvoiceNo' as transaction identifier and 'Description' as items/products
basket = data.groupby(['InvoiceNo', 'Description']).size().unstack(fill_value=0)
basket = basket.applymap(lambda x: 1 if x > 0 else 0)  # Convert to 1s and 0s (one-hot encoding)


In [None]:
# Step 5: Perform association rule mining using mlxtend
# Generate frequent itemsets with a minimum support of 0.01
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)
# Generate association rules based on the frequent itemsets using confidence as the metric
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Display the resulting rules
print(rules.head())

                       antecedents                            consequents  \
0             ( DOLLY GIRL BEAKER)                              (POSTAGE)   
1         (10 COLOUR SPACEBOY PEN)                              (POSTAGE)   
2  (12 PENCIL SMALL TUBE WOODLAND)                              (POSTAGE)   
3  (12 PENCILS TALL TUBE WOODLAND)                              (POSTAGE)   
4  (12 PENCILS TALL TUBE WOODLAND)  (ROUND SNACK BOXES SET OF4 WOODLAND )   

   antecedent support  consequent support   support  confidence      lift  \
0            0.011609            0.635158  0.011609         1.0  1.574413   
1            0.018242            0.635158  0.018242         1.0  1.574413   
2            0.016584            0.635158  0.013267         0.8  1.259530   
3            0.016584            0.635158  0.013267         0.8  1.259530   
4            0.016584            0.197347  0.011609         0.7  3.547059   

   leverage  conviction  zhangs_metric  
0  0.004235         inf       0.3

In [None]:
# Step 6: (Optional) Sort and analyze the rules
rules = rules.sort_values(by='lift', ascending=False)
print(rules.head(10))  # Display the top 10 rules based on lift

                                            antecedents  \
2612  (RED VINTAGE SPOT BEAKER, PINK VINTAGE SPOT BE...   
2609  (BLUE VINTAGE SPOT BEAKER, GREEN VINTAGE SPOT ...   
2793  (SCANDINAVIAN REDS RIBBONS, ROUND SNACK BOXES ...   
236                  (STAR WREATH DECORATION WITH BELL)   
2796  (CHOCOLATE BOX RIBBONS , ROUND SNACK BOXES SET...   
2794  (SCANDINAVIAN REDS RIBBONS, ROUND SNACK BOXES ...   
2797  (CHOCOLATE BOX RIBBONS , ROUND SNACK BOXES SET...   
237                 (HEART WREATH DECORATION WITH BELL)   
2887  (SPACEBOY CHILDRENS BOWL, DOLLY GIRL CHILDRENS...   
2889  (DOLLY GIRL CHILDRENS BOWL, SPACEBOY CHILDRENS...   

                                            consequents  antecedent support  \
2612  (BLUE VINTAGE SPOT BEAKER, GREEN VINTAGE SPOT ...            0.013267   
2609  (RED VINTAGE SPOT BEAKER, PINK VINTAGE SPOT BE...            0.011609   
2793  (CHOCOLATE BOX RIBBONS , ROUND SNACK BOXES SET...            0.013267   
236                 (HEART WREATH 