In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from apyori import apriori

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('cleaned.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,BillNo,Itemname,Quantity,Date,Price,CustomerID,Country,Year,Month,Datetime,Total
0,41242,539993,JUMBO BAG PINK POLKADOT,10,2011-01-04,1.95,13313.0,United Kingdom,2011,1,2011-01-04 10:00:00,19.5
1,41243,539993,BLUE POLKADOT WRAP,25,2011-01-04,0.42,13313.0,United Kingdom,2011,1,2011-01-04 10:00:00,10.5
2,41244,539993,RED RETROSPOT WRAP,25,2011-01-04,0.42,13313.0,United Kingdom,2011,1,2011-01-04 10:00:00,10.5
3,41245,539993,RECYCLING BAG RETROSPOT,5,2011-01-04,2.1,13313.0,United Kingdom,2011,1,2011-01-04 10:00:00,10.5
4,41246,539993,RED RETROSPOT SHOPPER BAG,10,2011-01-04,1.25,13313.0,United Kingdom,2011,1,2011-01-04 10:00:00,12.5


# Trail 1

In [4]:
items=df.set_index(['BillNo']).stack()
items

BillNo            
539993  Unnamed: 0                      41242
        Itemname      JUMBO BAG PINK POLKADOT
        Quantity                           10
        Date                       2011-01-04
        Price                            1.95
                               ...           
581587  Country                        France
        Year                             2011
        Month                              12
        Datetime          2011-12-09 12:50:00
        Total                           14.85
Length: 5243029, dtype: object

In [7]:
item = pd.DataFrame(items)
item.rename(columns={0:'list'},inplace=True)
item.index.levels[0]

Index([539993, 540001, 540002, 540003, 540004, 540005, 540013, 540014, 540015,
       540016,
       ...
       581578, 581579, 581580, 581581, 581582, 581583, 581584, 581585, 581586,
       581587],
      dtype='int64', name='BillNo', length=17868)

In [8]:
products=[]

for i in (item.index.levels[0]):
    product=item.loc[(i,['Itemname']),'list'].to_list()
    
    products.append(product)

In [9]:
products[0:5]

[['JUMBO BAG PINK POLKADOT',
  'BLUE POLKADOT WRAP',
  'RED RETROSPOT WRAP',
  'RECYCLING BAG RETROSPOT',
  'RED RETROSPOT SHOPPER BAG',
  'JUMBO BAG RED RETROSPOT',
  'RED RETROSPOT CHILDRENS UMBRELLA',
  'JAM MAKING SET PRINTED',
  'RECIPE BOX RETROSPOT',
  'CHILDRENS APRON APPLES DESIGN',
  'PEG BAG APPLES DESIGN',
  'COFFEE MUG APPLES DESIGN',
  'COFFEE MUG PEARS  DESIGN',
  'WHITE HANGING HEART T-LIGHT HOLDER',
  'SET OF 6 T-LIGHTS EASTER CHICKS',
  'CAST IRON HOOK GARDEN FORK',
  'LOVE HEART NAPKIN BOX'],
 ['RED HANGING HEART T-LIGHT HOLDER',
  'CERAMIC BOWL WITH LOVE HEART DESIGN',
  'LARGE CAKE STAND HANGING HEARTS',
  'PLACE SETTING WHITE HEART',
  'DOORSTOP RETROSPOT HEART',
  'GINGHAM HEART  DOORSTOP RED',
  'CERAMIC CHERRY CAKE MONEY BANK',
  'LOVE HEART POCKET WARMER',
  'SWEETHEART CERAMIC TRINKET BOX'],
 ['GARDEN METAL SIGN',
  'RED KITCHEN SCALES',
  'VICTORIAN SEWING BOX SMALL',
  'VINTAGE SNAP CARDS'],
 ['HANGING HEART ZINC T-LIGHT HOLDER',
  'BREAD BIN DINER STYLE RE

In [12]:
rules=apriori(transactions=products,min_support=0.003,min_confidence=0.8,min_lift=3,min_length=2,max_length=3)
rules

<generator object apriori at 0x000002AF9382C040>

In [13]:
results=list(rules)
results

[RelationRecord(items=frozenset({'3 RAFFIA RIBBONS VINTAGE CHRISTMAS', "3 RAFFIA RIBBONS 50'S CHRISTMAS"}), support=0.0047011417058428475, ordered_statistics=[OrderedStatistic(items_base=frozenset({'3 RAFFIA RIBBONS VINTAGE CHRISTMAS'}), items_add=frozenset({"3 RAFFIA RIBBONS 50'S CHRISTMAS"}), confidence=0.865979381443299, lift=79.35035685963521)]),
 RelationRecord(items=frozenset({'BICYCLE PUNCTURE REPAIR KIT', 'CLASSIC BICYCLE CLIPS'}), support=0.004421311842399821, ordered_statistics=[OrderedStatistic(items_base=frozenset({'CLASSIC BICYCLE CLIPS'}), items_add=frozenset({'BICYCLE PUNCTURE REPAIR KIT'}), confidence=0.8061224489795917, lift=126.34908700322232)]),
 RelationRecord(items=frozenset({'SMALL DOLLY MIX DESIGN ORANGE BOWL', 'BISCUITS SMALL BOWL LIGHT BLUE'}), support=0.008450861875979404, ordered_statistics=[OrderedStatistic(items_base=frozenset({'BISCUITS SMALL BOWL LIGHT BLUE'}), items_add=frozenset({'SMALL DOLLY MIX DESIGN ORANGE BOWL'}), confidence=0.9096385542168675, lif

In [14]:
def inspect(results):
    lhs=[tuple(result[2][0][0])[0] for result in results]
    rhs=[tuple(result[2][0][1])[0] for result in results]
    supports=[result[1] for result in results]
    confidences=[result[2][0][2] for result in results]
    lifts=[result[2][0][3] for result in results]

    return list(zip(lhs,rhs,supports,confidences,lifts))

In [15]:
result=pd.DataFrame(inspect(results),columns=['Left Hand Side','Right Hand Side','Support','Confidence','Lift'])
result.sort_values(by='Confidence',ascending=False)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
188,"BIRTHDAY CARD, RETRO SPOT",SUKI SHOULDER BAG,0.003078,1.0,37.147609
1672,BUTTON BOX,SUKI SHOULDER BAG,0.004141,1.0,37.147609
863,ASSORTED TUTTI FRUTTI BRACELET,SUKI SHOULDER BAG,0.003078,1.0,37.147609
5975,PIZZA PLATE IN BOX,WOODEN STAR CHRISTMAS SCANDINAVIAN,0.003134,1.0,38.425806
271,TOAST ITS - FAIRY FLOWER,SUKI SHOULDER BAG,0.003526,1.0,37.147609
...,...,...,...,...,...
4357,HOT BATHS METAL SIGN,RED RETROSPOT OVEN GLOVE,0.003134,0.8,36.096970
1940,WOODLAND CHARLOTTE BAG,SUKI SHOULDER BAG,0.004253,0.8,29.718087
1811,CARD BILLBOARD FONT,GREEN REGENCY TEACUP AND SAUCER,0.003582,0.8,15.708132
5919,SET/6 RED SPOTTY PAPER PLATES,SET/20 RED RETROSPOT PAPER NAPKINS,0.004477,0.8,15.829900
