In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

In [3]:
df = pd.read_excel('Data/OnlineRetail.xlsx')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [4]:
df.isnull().sum()


InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64

In [5]:
def run_apriori(df, min_support):
    return apriori(df, min_support=min_support, use_colnames=True)


def generate_rules(frequent_itemsets, min_confidence):
    return association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)


def run_fpgrowth(transactions, min_support):
    patterns = pyfpgrowth.find_frequent_patterns(transactions,
                                                 min_support * len(transactions))
    return patterns

In [6]:
df = df.dropna(subset=["Description", "CustomerID"])
transactions = df.groupby('InvoiceNo')['Description'].apply(list).values.tolist()

In [7]:
# One Hot Encoding
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_onehot = pd.DataFrame(te_ary, columns=te.columns_)
df_onehot.head()

Unnamed: 0,4 PURPLE FLOCK DINNER CANDLES,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,I LOVE LONDON MINI RUCKSACK,NINE DRAWER OFFICE TIDY,OVAL WALL MIRROR DIAMANTE,RED SPOT GIFT BAG LARGE,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,...,ZINC STAR T-LIGHT HOLDER,ZINC SWEETHEART SOAP DISH,ZINC SWEETHEART WIRE LETTER RACK,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK,ZINC WIRE KITCHEN ORGANISER,ZINC WIRE SWEETHEART LETTER TRAY
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [8]:
# Some Constants
min_support = 0.02
min_confidence = 0.7

In [9]:
# Using Apriori
frequent_itemsets_apriori = run_apriori(df_onehot, min_support)
frequent_itemsets_apriori

Unnamed: 0,support,itemsets
0,0.032988,(6 RIBBONS RUSTIC CHARM)
1,0.021181,(60 CAKE CASES VINTAGE CHRISTMAS)
2,0.029923,(60 TEATIME FAIRY CAKE CASES)
3,0.022758,(72 SWEETHEART FAIRY CAKE CASES)
4,0.036458,(ALARM CLOCK BAKELIKE GREEN)
...,...,...
171,0.020595,"(LUNCH BAG SUKI DESIGN , LUNCH BAG RED RETROSPOT)"
172,0.020460,"(PAPER CHAIN KIT VINTAGE CHRISTMAS, PAPER CHAI..."
173,0.020324,"(PINK REGENCY TEACUP AND SAUCER, ROSES REGENCY..."
174,0.020730,"(RED HANGING HEART T-LIGHT HOLDER, WHITE HANGI..."


In [10]:
rules_apriori = generate_rules(frequent_itemsets_apriori,
min_confidence)
rules_apriori[['antecedents', 'consequents', 'confidence']]

Unnamed: 0,antecedents,consequents,confidence
0,(GARDENERS KNEELING PAD CUP OF TEA ),(GARDENERS KNEELING PAD KEEP CALM ),0.725857
1,(PINK REGENCY TEACUP AND SAUCER),(GREEN REGENCY TEACUP AND SAUCER),0.796954
2,(GREEN REGENCY TEACUP AND SAUCER),(ROSES REGENCY TEACUP AND SAUCER ),0.759891
3,(PINK REGENCY TEACUP AND SAUCER),(ROSES REGENCY TEACUP AND SAUCER ),0.763113
