In [2]:
pip install mlxtend

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import numpy as np  
import pandas as pd  
from mlxtend.frequent_patterns import apriori, association_rules  

In [4]:
from google.colab import files
uploaded = files.upload()

Saving Online Retail 10000.xlsx to Online Retail 10000 (2).xlsx


In [5]:
# Now, we will load the Data  
data1 = pd.read_excel('Online Retail 10000.xlsx')  
data1.head()

Unnamed: 0,InvoiceNo,StockCode,lower,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,white hanging heart t-light holder,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,white metal lantern,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,cream cupid hearts coat hanger,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,knitted union flag hot water bottle,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,red woolly hottie white heart.,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [6]:
# here, we will explore the columns of the data  
data1.columns

Index(['InvoiceNo', 'StockCode', 'lower', 'Description', 'Quantity',
       'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [7]:
# Now, we will explore the different regions of transactions  
data1.Country.unique()  

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan'], dtype=object)

In [None]:
%pip install mlxtend --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mlxtend
  Downloading mlxtend-0.21.0-py2.py3-none-any.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mlxtend
  Attempting uninstall: mlxtend
    Found existing installation: mlxtend 0.14.0
    Uninstalling mlxtend-0.14.0:
      Successfully uninstalled mlxtend-0.14.0
Successfully installed mlxtend-0.21.0


In [None]:
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

# Strip extra spaces in the description
data1['Description'] = data1['Description'].str.strip()

# Drop rows without invoice number
data1.dropna(axis=0, subset=['InvoiceNo'], inplace=True)

# Convert invoice number to string
data1['InvoiceNo'] = data1['InvoiceNo'].astype('str')

# Drop credit transactions
data1 = data1[~data1['InvoiceNo'].str.contains('C')]

# Transactions done in France
basket1_France = (data1[data1['Country'] == "France"]
                  .groupby(['InvoiceNo', 'Description'])['Quantity']
                  .sum().unstack().reset_index().fillna(0)
                  .set_index('InvoiceNo'))

# Transactions done in the United Kingdom
basket1_UK = (data1[data1['Country'] == "United Kingdom"]
               .groupby(['InvoiceNo', 'Description'])['Quantity']
               .sum().unstack().reset_index().fillna(0)
               .set_index('InvoiceNo'))

# Transactions done in Portugal
basket1_Por = (data1[data1['Country'] == "Portugal"]
                .groupby(['InvoiceNo', 'Description'])['Quantity']
                .sum().unstack().reset_index().fillna(0)
                .set_index('InvoiceNo'))

# Transactions done in Sweden
basket1_Sweden = (data1[data1['Country'] == "Sweden"]
                  .groupby(['InvoiceNo', 'Description'])['Quantity']
                  .sum().unstack().reset_index().fillna(0)
                  .set_index('InvoiceNo'))

# Define hot encoding function
def hot_encode1(x):
    if x <= 0:
        return 0
    else:
        return 1

# Encode datasets using hot encoding
basket1_encoded = basket1_France.applymap(hot_encode1)
basket1_France = basket1_encoded

basket1_encoded = basket1_UK.applymap(hot_encode1)
basket1_UK = basket1_encoded

basket1_encoded = basket1_Por.applymap(hot_encode1)
basket1_Por = basket1_encoded

basket1_encoded = basket1_Sweden.applymap(hot_encode1)
basket1_Sweden = basket1_encoded

# Apply FP-growth algorithm to find frequent itemsets
frq_items = fpgrowth(basket1_France, min_support=0.05, use_colnames=True)

# Generate association rules from frequent itemsets
rules = association_rules(frq_items, metric='lift', min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])





In [None]:
# encoding the datasets
basket1_encoded = basket1_France.applymap(hot_encode1).astype(bool)
basket1_France = basket1_encoded

basket1_encoded = basket1_UK.applymap(hot_encode1).astype(bool)
basket1_UK = basket1_encoded

basket1_encoded = basket1_Por.applymap(hot_encode1).astype(bool)
basket1_Por = basket1_encoded

basket1_encoded = basket1_Sweden.applymap(hot_encode1).astype(bool)
basket1_Sweden = basket1_encoded

# Building the model with FP Growth algorithm
frequent_itemsets = fpgrowth(basket1_France, min_support=0.05, use_colnames=True)

# Collecting the inferred rules in a dataframe
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
print(rules.head())


                                           antecedents  \
114  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   
272  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   
329                       (JUMBO BAG WOODLAND ANIMALS)   
220  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   
221  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   

                         consequents  antecedent support  consequent support  \
114                        (POSTAGE)            0.053571            0.765306   
272                        (POSTAGE)            0.051020            0.765306   
329                        (POSTAGE)            0.076531            0.765306   
220  (SET/6 RED SPOTTY PAPER PLATES)            0.102041            0.127551   
221    (SET/6 RED SPOTTY PAPER CUPS)            0.102041            0.137755   

      support  confidence      lift  leverage  conviction  
114  0.053571       1.000  1.306667  0.012573         inf  
272  0.051020       1.000  1.306667  0.011974     