In [1]:
# Importing libraries
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import fpgrowth, association_rules

# Load the data
data = pd.read_excel('./data/Online_Retail.xlsx')


In [2]:
# Data Cleaning
data['Description'] = data['Description'].str.strip()  # Remove extra spaces
data.dropna(subset=['InvoiceNo'], inplace=True)        # Drop rows with missing InvoiceNo
data['InvoiceNo'] = data['InvoiceNo'].astype('str')    # Ensure InvoiceNo is string

data = data[~data['InvoiceNo'].str.contains('C')]      # Exclude credit transactions

# Hot encoding function
def hot_encode(x):
    return 1 if x >= 1 else 0

In [3]:
# Filtering data for United Kingdom
basket_UK = (data[data['Country'] == "United Kingdom"]
             .groupby(['InvoiceNo', 'Description'])['Quantity']
             .sum().unstack().reset_index().fillna(0)
             .set_index('InvoiceNo'))

# Hot encoding the data
basket_UK = basket_UK.applymap(hot_encode)

# FP-Growth for UK
frq_items_UK = fpgrowth(basket_UK, min_support=0.02, use_colnames=True)
rules_UK = association_rules(frq_items_UK, metric="lift", min_threshold=1)

# Display results
print("Rules for United Kingdom")
print(rules_UK.sort_values(['confidence', 'lift'], ascending=[False, False]).head())

  basket_UK = basket_UK.applymap(hot_encode)


Rules for United Kingdom
                                           antecedents  \
150  (PINK REGENCY TEACUP AND SAUCER, ROSES REGENCY...   
148  (GREEN REGENCY TEACUP AND SAUCER, PINK REGENCY...   
145                   (PINK REGENCY TEACUP AND SAUCER)   
18   (JUMBO STORAGE BAG SUKI, JUMBO BAG PINK POLKADOT)   
146                   (PINK REGENCY TEACUP AND SAUCER)   

                           consequents  antecedent support  \
150  (GREEN REGENCY TEACUP AND SAUCER)            0.029249   
148  (ROSES REGENCY TEACUP AND SAUCER)            0.030910   
145  (GREEN REGENCY TEACUP AND SAUCER)            0.037660   
18           (JUMBO BAG RED RETROSPOT)            0.027053   
146  (ROSES REGENCY TEACUP AND SAUCER)            0.037660   

     consequent support   support  confidence       lift  leverage  \
150            0.050035  0.026410    0.902930  18.046041  0.024947   
148            0.051267  0.026410    0.854419  16.666089  0.024826   
145            0.050035  0.030910    0.8207

In [4]:
# Filtering data for France
basket_France = (data[data['Country'] == "France"]
                 .groupby(['InvoiceNo', 'Description'])['Quantity']
                 .sum().unstack().reset_index().fillna(0)
                 .set_index('InvoiceNo'))

# Hot encoding the data
basket_France = basket_France.applymap(hot_encode)

# FP-Growth for France
frq_items_France = fpgrowth(basket_France, min_support=0.05, use_colnames=True)
rules_France = association_rules(frq_items_France, metric="lift", min_threshold=1)

# Display results
print("Rules for France")
print(rules_France.sort_values(['confidence', 'lift'], ascending=[False, False]).head())

Rules for France
                                           antecedents  \
114  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   
272  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   
329                       (JUMBO BAG WOODLAND ANIMALS)   
222  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   
221  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...   

                         consequents  antecedent support  consequent support  \
114                        (POSTAGE)            0.053571            0.765306   
272                        (POSTAGE)            0.051020            0.765306   
329                        (POSTAGE)            0.076531            0.765306   
222  (SET/6 RED SPOTTY PAPER PLATES)            0.102041            0.127551   
221    (SET/6 RED SPOTTY PAPER CUPS)            0.102041            0.137755   

      support  confidence      lift  leverage  conviction  zhangs_metric  
114  0.053571       1.000  1.306667  0.012573         inf       0.247978  
272

  basket_France = basket_France.applymap(hot_encode)


In [5]:
# Filtering data for Portugal
basket_Portugal = (data[data['Country'] == "Portugal"]
                   .groupby(['InvoiceNo', 'Description'])['Quantity']
                   .sum().unstack().reset_index().fillna(0)
                   .set_index('InvoiceNo'))

# Hot encoding the data
basket_Portugal = basket_Portugal.applymap(hot_encode)

# FP-Growth for Portugal
frq_items_Portugal = fpgrowth(basket_Portugal, min_support=0.05, use_colnames=True)
rules_Portugal = association_rules(frq_items_Portugal, metric="lift", min_threshold=1)

# Display results
print("Rules for Portugal")
print(rules_Portugal.sort_values(['confidence', 'lift'], ascending=[False, False]).head())

  basket_Portugal = basket_Portugal.applymap(hot_encode)


Rules for Portugal
                                             antecedents  \
13224  (LUNCH BAG CARS BLUE, PLASTERS IN TIN CIRCUS P...   
13225    (LUNCH BAG CARS BLUE, PLASTERS IN TIN SPACEBOY)   
13228  (LUNCH BAG PINK POLKADOT, PLASTERS IN TIN CIRC...   
13229  (LUNCH BAG PINK POLKADOT, PLASTERS IN TIN SPAC...   
13239  (LUNCH BAG CARS BLUE, PLASTERS IN TIN CIRCUS P...   

                                             consequents  antecedent support  \
13224  (LUNCH BAG PINK POLKADOT, PLASTERS IN TIN SPAC...            0.051724   
13225  (LUNCH BAG PINK POLKADOT, PLASTERS IN TIN CIRC...            0.051724   
13228    (LUNCH BAG CARS BLUE, PLASTERS IN TIN SPACEBOY)            0.051724   
13229  (LUNCH BAG CARS BLUE, PLASTERS IN TIN CIRCUS P...            0.051724   
13239  (LUNCH BAG PINK POLKADOT, PLASTERS IN TIN VINT...            0.051724   

       consequent support   support  confidence       lift  leverage  \
13224            0.051724  0.051724         1.0  19.333333  0.04904

In [6]:
# Filtering data for Sweden
basket_Sweden = (data[data['Country'] == "Sweden"]
                 .groupby(['InvoiceNo', 'Description'])['Quantity']
                 .sum().unstack().reset_index().fillna(0)
                 .set_index('InvoiceNo'))

# Hot encoding the data
basket_Sweden = basket_Sweden.applymap(hot_encode)

# FP-Growth for Sweden
frq_items_Sweden = fpgrowth(basket_Sweden, min_support=0.05, use_colnames=True)
rules_Sweden = association_rules(frq_items_Sweden, metric="lift", min_threshold=1)

# Display results
print("Rules for Sweden")
print(rules_Sweden.sort_values(['confidence', 'lift'], ascending=[False, False]).head())

  basket_Sweden = basket_Sweden.applymap(hot_encode)


Rules for Sweden
                                           antecedents  \
121  (PACK OF 60 SPACEBOY CAKE CASES, RETROSPOT TEA...   
122  (PACK OF 60 SPACEBOY CAKE CASES, MINI PAINT SE...   
123  (PACK OF 72 RETROSPOT CAKE CASES, RETROSPOT TE...   
124  (PACK OF 72 RETROSPOT CAKE CASES, MINI PAINT S...   
149  (PACK OF 72 RETROSPOT CAKE CASES, SET OF 3 CAK...   

                                           consequents  antecedent support  \
121  (PACK OF 72 RETROSPOT CAKE CASES, MINI PAINT S...            0.055556   
122  (PACK OF 72 RETROSPOT CAKE CASES, RETROSPOT TE...            0.055556   
123  (PACK OF 60 SPACEBOY CAKE CASES, MINI PAINT SE...            0.055556   
124  (PACK OF 60 SPACEBOY CAKE CASES, RETROSPOT TEA...            0.055556   
149  (PACK OF 60 SPACEBOY CAKE CASES, MINI PAINT SE...            0.055556   

     consequent support   support  confidence  lift  leverage  conviction  \
121            0.055556  0.055556         1.0  18.0  0.052469         inf   
122        

In [7]:
# Filtering data for Japan
basket_Japan = (data[data['Country'] == "Japan"]
                .groupby(['InvoiceNo', 'Description'])['Quantity']
                .sum().unstack().reset_index().fillna(0)
                .set_index('InvoiceNo'))

# Hot encoding the data
basket_Japan = basket_Japan.applymap(hot_encode)

# FP-Growth for Japan
frq_items_Japan = fpgrowth(basket_Japan, min_support=0.11, use_colnames=True)
rules_Japan = association_rules(frq_items_Japan, metric="lift", min_threshold=1)

# Display results
print("Rules for Japan")
print(rules_Japan.sort_values(['confidence', 'lift'], ascending=[False, False]).head())

Rules for Japan
                                           antecedents  \
136  (CHARLOTTE BAG DOLLY GIRL DESIGN, BASKET OF TO...   
138  (CHARLOTTE BAG DOLLY GIRL DESIGN, SET 3 RETROS...   
139  (BASKET OF TOADSTOOLS, LUNCH BAG DOLLY GIRL DE...   
141  (LUNCH BAG DOLLY GIRL DESIGN, SET 3 RETROSPOT ...   
167  (BASKET OF TOADSTOOLS, LUNCH BAG DOLLY GIRL DE...   

                                           consequents  antecedent support  \
136  (LUNCH BAG DOLLY GIRL DESIGN, SET 3 RETROSPOT ...            0.157895   
138  (BASKET OF TOADSTOOLS, LUNCH BAG DOLLY GIRL DE...            0.157895   
139  (CHARLOTTE BAG DOLLY GIRL DESIGN, SET 3 RETROS...            0.157895   
141  (CHARLOTTE BAG DOLLY GIRL DESIGN, BASKET OF TO...            0.157895   
167  (CHARLOTTE BAG DOLLY GIRL DESIGN, SET 3 RETROS...            0.157895   

     consequent support   support  confidence      lift  leverage  conviction  \
136            0.157895  0.157895         1.0  6.333333  0.132964         inf   
138 

  basket_Japan = basket_Japan.applymap(hot_encode)


In [8]:
# Filtering data for Iceland
basket_Iceland = (data[data['Country'] == "Iceland"]
                  .groupby(['InvoiceNo', 'Description'])['Quantity']
                  .sum().unstack().reset_index().fillna(0)
                  .set_index('InvoiceNo'))

# Hot encoding the data
basket_Iceland = basket_Iceland.applymap(hot_encode)

# FP-Growth for Iceland
frq_items_Iceland = fpgrowth(basket_Iceland, min_support=0.29, use_colnames=True)
rules_Iceland = association_rules(frq_items_Iceland, metric="lift", min_threshold=1)

# Display results
print("Rules for Iceland")
print(rules_Iceland.sort_values(['confidence', 'lift'], ascending=[False, False]).head())

Rules for Iceland
                                           antecedents  \
268                           (MINI PAINT SET VINTAGE)   
269                     (LARGE HEART MEASURING SPOONS)   
274  (RED TOADSTOOL LED NIGHT LIGHT, MINI PAINT SET...   
275  (RED TOADSTOOL LED NIGHT LIGHT, LARGE HEART ME...   
278                           (MINI PAINT SET VINTAGE)   

                                           consequents  antecedent support  \
268                     (LARGE HEART MEASURING SPOONS)            0.428571   
269                           (MINI PAINT SET VINTAGE)            0.428571   
274                     (LARGE HEART MEASURING SPOONS)            0.428571   
275                           (MINI PAINT SET VINTAGE)            0.428571   
278  (RED TOADSTOOL LED NIGHT LIGHT, LARGE HEART ME...            0.428571   

     consequent support   support  confidence      lift  leverage  conviction  \
268            0.428571  0.428571         1.0  2.333333  0.244898         inf   
26

  basket_Iceland = basket_Iceland.applymap(hot_encode)
