## Load Data

In [1]:
import numpy as np
import pandas as pd
from random import sample
from libs.utils import *

In [3]:
path_to_data = "C:/Users/Admin/DA_Projects/online_retail.txt"
min_support = 0.05
min_confidence = 0.6
path_to_order = "C:/Users/Admin/DA_Projects/order.txt"

In [5]:
order = load_order(path_to_order)
transactions = load_transactions(path_to_data, order)
sampled_transactions = transactions[:int(len(transactions) * 0.01)] 
filtered_transactions = [t for t in sampled_transactions if len(t) < 10]
if len(filtered_transactions) > 50:
    filtered_transactions = sample(filtered_transactions, 50)
flattened_items = [item for sublist in filtered_transactions for item in sublist]
order = sorted(set(flattened_items))
print(order)

['3 PIECE SPACEBOY COOKIE CUTTER SET', '3 TRADITIONAl BISCUIT CUTTERS  SET', '60 CAKE CASES VINTAGE CHRISTMAS', 'ALARM CLOCK BAKELIKE ORANGE', 'ANTIQUE GLASS DRESSING TABLE POT', 'ANTIQUE TALL SWIRLGLASS TRINKET POT', 'ASSORTED COLOUR BIRD ORNAMENT', 'ASSORTED COLOUR MINI CASES', 'BAG 125g SWIRLY MARBLES', 'BAKING SET SPACEBOY DESIGN', 'BATH BUILDING BLOCK WORD', 'BLACK HEART CARD HOLDER', 'BLACK RECORD COVER FRAME', 'BLUE BIRDHOUSE DECORATION', 'CANDLEHOLDER PINK HANGING HEART', 'CARD CIRCUS PARADE', 'CERAMIC CAKE STAND + HANGING CAKES', 'CERAMIC STRAWBERRY DESIGN MUG', 'CHILDS BREAKFAST SET SPACEBOY', 'CHILLI LIGHTS', 'CHOCOLATE HOT WATER BOTTLE', 'CHRISTMAS LIGHTS 10 REINDEER', 'CHRISTMAS MUSICAL ZINC HEART', 'CHRISTMAS MUSICAL ZINC STAR', 'CINAMMON SET OF 9 T-LIGHTS', 'CIRCUS PARADE CHILDRENS EGG CUP', 'CLASSICAL ROSE SMALL VASE', 'COLOUR GLASS T-LIGHT HOLDER HANGING', 'CREAM HEART CARD HOLDER', 'CREAM SLICE FLANNEL PINK SPOT', 'CUPCAKE LACE PAPER SET 6', 'DOVE DECORATION PAINTED Z

In [28]:
sum = 0
for sublist in filtered_transactions:
    sum += len(sublist)
print(sum)

178


In [30]:
len(filtered_transactions)

50

In [32]:
len(order)

142

## Initialization 

In [34]:
C = {}
L = {}
itemset_size = 1
discarded = {itemset_size: []}
C.update({itemset_size: [[f] for f in order]})

In [36]:
C

{1: [['3 PIECE SPACEBOY COOKIE CUTTER SET'],
  ['60 CAKE CASES VINTAGE CHRISTMAS'],
  ['ALARM CLOCK BAKELIKE CHOCOLATE'],
  ['ALARM CLOCK BAKELIKE GREEN'],
  ['ALARM CLOCK BAKELIKE IVORY'],
  ['ALARM CLOCK BAKELIKE ORANGE'],
  ['ALARM CLOCK BAKELIKE PINK'],
  ['ALARM CLOCK BAKELIKE RED'],
  ['ANT WHITE WIRE HEART SPIRAL'],
  ['ANTIQUE GLASS DRESSING TABLE POT'],
  ['ANTIQUE TALL SWIRLGLASS TRINKET POT'],
  ['ASSORTED BOTTLE TOP  MAGNETS'],
  ['ASSORTED COLOUR BIRD ORNAMENT'],
  ['ASSORTED COLOUR MINI CASES'],
  ['BAG 125g SWIRLY MARBLES'],
  ['BAKING SET SPACEBOY DESIGN'],
  ['BATH BUILDING BLOCK WORD'],
  ['BIRTHDAY PARTY CORDON BARRIER TAPE'],
  ['BLACK HEART CARD HOLDER'],
  ['BLACK RECORD COVER FRAME'],
  ['BLUE BIRDHOUSE DECORATION'],
  ['BLUE DRAWER KNOB ACRYLIC EDWARDIAN'],
  ['BREAD BIN DINER STYLE PINK'],
  ['BREAD BIN DINER STYLE RED'],
  ['CAKE PLATE LOVEBIRD WHITE'],
  ['CARD CIRCUS PARADE'],
  ['CERAMIC CAKE STAND + HANGING CAKES'],
  ['CERAMIC HEART FAIRY CAKE MONEY BANK'

__Create L1__

In [38]:
supp_count_L = {}
f, sup, new_discarded = get_frequent(C[itemset_size], filtered_transactions, min_support, discarded)
discarded.update({itemset_size : new_discarded})
L.update({itemset_size : f})
supp_count_L.update({itemset_size : sup})

In [40]:
print_table(L[1], supp_count_L[1])

Itemset | Frequency
['HAND WARMER RED POLKA DOT']  :  10
['HAND WARMER UNION JACK']  :  11
['JAM MAKING SET PRINTED']  :  3




## Apriori algorithm

In [42]:
k = itemset_size + 1
convergence = False
while not convergence:
    C.update({ k : join_set_itemsets(L[k-1], order)})
    f, sup, new_discarded = get_frequent(C[k], filtered_transactions, min_support, discarded) 
    discarded.update({k : new_discarded})
    L.update({k : f})
    supp_count_L.update({k : sup})
    if len(L[k]) == 0:
        convergence = True
    else:
        print("Table L{}: \n".format(k))
        print_table(L[k], supp_count_L[k])
    k += 1

Table L2: 

Itemset | Frequency
['HAND WARMER RED POLKA DOT', 'HAND WARMER UNION JACK']  :  10




## Generating the Association rules

In [44]:
assoc_rules_str = ""
num_trans = len(filtered_transactions)
for i in range(1, len(L)):
    for j in range(len(L[i])):
        s = powerset(L[i][j])
        for z in s:
            S = set(z)
            X = set(L[i][j])
            X_S = set(X-S)
            sup_x = supp_count_L[i][j]
            sup_x_s = count_occurences(X_S, filtered_transactions)
            conf = sup_x / count_occurences(S, filtered_transactions)
            lift = conf / (sup_x_s / num_trans)
            if conf >= min_confidence:
                assoc_rules_str += write_rules(X, X_S, S, conf, sup_x, lift, num_trans)

In [46]:
print(assoc_rules_str)

Freq. Itemset: {'HAND WARMER UNION JACK', 'HAND WARMER RED POLKA DOT'}
    Rule: ['HAND WARMER RED POLKA DOT'] -> ['HAND WARMER UNION JACK'] 
    Conf: 1.000     Supp: 0.200     Lift: 4.545 
Freq. Itemset: {'HAND WARMER UNION JACK', 'HAND WARMER RED POLKA DOT'}
    Rule: ['HAND WARMER UNION JACK'] -> ['HAND WARMER RED POLKA DOT'] 
    Conf: 0.909     Supp: 0.200     Lift: 4.545 



In [73]:
with open('apriori_result.txt', 'w') as f:
    f.write(assoc_rules_str)