# Import main libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Import data

In [2]:
data = pd.read_excel('Australia Online Retail.xlsx') 
data.head() 

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536389,22941,CHRISTMAS LIGHTS 10 REINDEER,6,2010-12-01 10:03:00,8.5,12431,Australia
1,536389,21622,VINTAGE UNION JACK CUSHION COVER,8,2010-12-01 10:03:00,4.95,12431,Australia
2,536389,21791,VINTAGE HEADS AND TAILS CARD GAME,12,2010-12-01 10:03:00,1.25,12431,Australia
3,536389,35004C,SET OF 3 COLOURED FLYING DUCKS,6,2010-12-01 10:03:00,5.45,12431,Australia
4,536389,35004G,SET OF 3 GOLD FLYING DUCKS,4,2010-12-01 10:03:00,6.35,12431,Australia


# Changing the data suitable for the library

In [3]:
# Stripping extra spaces in the description 
data['Description'] = data['Description'].str.strip() 

In [4]:
data = (data.groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo'))

In [5]:
def hot_encode(x): 
    if(x<= 0): 
        return np.NaN
    if(x>= 1): 
        return 1

In [6]:
data = data.applymap(hot_encode)

In [7]:
data = data.replace(1, pd.Series(data.columns, data.columns))

In [8]:
data.head(2)

Description,10 COLOUR SPACEBOY PEN,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,16 PIECE CUTLERY SET PANTRY DESIGN,20 DOLLY PEGS RETROSPOT,3 HOOK HANGER MAGIC GARDEN,3 STRIPEY MICE FELTCRAFT,3 TIER CAKE TIN GREEN AND CREAM,3 TIER CAKE TIN RED AND CREAM,...,WRAP DOILEY DESIGN,WRAP DOLLY GIRL,WRAP ENGLISH ROSE,WRAP I LOVE LONDON,WRAP POPPIES DESIGN,WRAP RED APPLES,WRAP RED VINTAGE DOILY,WRAP VINTAGE LEAF DESIGN,WRAP WEDDING DAY,YELLOW GIANT GARDEN THERMOMETER
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536389,,,,,,,,,,,...,,,,,,,,,,
537676,,,,,,20 DOLLY PEGS RETROSPOT,,,,,...,,,,,,,,,,


In [9]:
data = data.apply(lambda x: list(map(str,x.to_list())), axis=1).to_list()

# Training Apriori on the dataset

In [10]:
from apyori import apriori

In [11]:
rules = apriori(data, min_support = .05, min_confidence = 0.2, min_lift = 3, min_length = 2)

# Visualising the results

In [12]:
list(rules)[:5]

[RelationRecord(items=frozenset({'RED RETROSPOT CAKE STAND', '36 PENCILS TUBE RED RETROSPOT'}), support=0.057971014492753624, ordered_statistics=[OrderedStatistic(items_base=frozenset({'36 PENCILS TUBE RED RETROSPOT'}), items_add=frozenset({'RED RETROSPOT CAKE STAND'}), confidence=1.0, lift=17.25), OrderedStatistic(items_base=frozenset({'RED RETROSPOT CAKE STAND'}), items_add=frozenset({'36 PENCILS TUBE RED RETROSPOT'}), confidence=1.0, lift=17.25)]),
 RelationRecord(items=frozenset({'SET OF 3 CAKE TINS PANTRY DESIGN', '36 PENCILS TUBE RED RETROSPOT'}), support=0.057971014492753624, ordered_statistics=[OrderedStatistic(items_base=frozenset({'36 PENCILS TUBE RED RETROSPOT'}), items_add=frozenset({'SET OF 3 CAKE TINS PANTRY DESIGN'}), confidence=1.0, lift=7.666666666666667), OrderedStatistic(items_base=frozenset({'SET OF 3 CAKE TINS PANTRY DESIGN'}), items_add=frozenset({'36 PENCILS TUBE RED RETROSPOT'}), confidence=0.4444444444444445, lift=7.666666666666667)]),
 RelationRecord(items=fro