Association Rules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from matplotlib.patches import Patch
import csv

# Mlxtend library
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

# pyECLAT library
from pyECLAT import ECLAT

customer_basket = pd.read_csv('customer_basket.csv')

reader = [eval(item) if isinstance(item, str) else item 
          for item in customer_basket['list_of_goods']]

In [2]:
te = TransactionEncoder()
te_fit = te.fit(reader).transform(reader)
transactions_items = pd.DataFrame(te_fit, columns=te.columns_)

In [3]:
frequent_itemsets_grocery = apriori(
    transactions_items, min_support=0.05, use_colnames=True
    )

In [4]:
frequent_itemsets_grocery.sort_values(by='support', ascending=False)

Unnamed: 0,support,itemsets
43,0.34900,(oil)
17,0.25052,(cooking oil)
3,0.18067,(babies food)
1,0.15952,(asparagus)
8,0.15134,(cake)
...,...,...
56,0.05175,(strong cheese)
47,0.05157,(pokemon sword)
76,0.05071,"(chicken, oil)"
53,0.05018,(samsung galaxy 10)


In [5]:
rules_grocery = association_rules(frequent_itemsets_grocery,
                                  metric="confidence",
                                  min_threshold=0.2)

In [7]:
frequent_itemsets_grocery_iter_2 = apriori(
    transactions_items, min_support=0.02, use_colnames=True
    )

# We'll use a confidence level of 20%
rules_grocery_iter_2 = association_rules(frequent_itemsets_grocery_iter_2,
                                  metric="confidence",
                                  min_threshold=0.2)

In [8]:
rules = rules_grocery_iter_2[['antecedents','consequents', 'support','confidence','lift']]

In [9]:
rules.head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(airpods),(energy drink),0.02357,0.323986,3.789313
1,(energy drink),(airpods),0.02357,0.275673,3.789313
2,(carrots),(asparagus),0.05928,0.536179,3.361205
3,(asparagus),(carrots),0.05928,0.371615,3.361205
4,(cauliflower),(asparagus),0.02146,0.434413,2.723251
5,(frozen vegetables),(asparagus),0.024,0.375587,2.354481
6,(mashed potato),(asparagus),0.03567,0.438153,2.746694
7,(asparagus),(mashed potato),0.03567,0.223608,2.746694
8,(melons),(asparagus),0.02685,0.460075,2.884124
9,(asparagus),(oil),0.03792,0.237713,0.681126


ECLAT METHOD

In [10]:
eclat_groceries = ECLAT(data=pd.DataFrame(reader))

In [11]:
groceries_rules_indexes, groceries_rules_supports = eclat_groceries.fit(min_support=0.02,
                                           min_combination=2,
                                           max_combination=2)

Combination 2 by 2


13203it [00:37, 355.90it/s]


In [12]:
rules_eclat_groceries = pd.DataFrame(
  list(groceries_rules_supports.values()),
  index=list(groceries_rules_supports.keys()),
  columns=['support']
)

rules_eclat_groceries.sort_values(by='support', ascending=False).head(10)

Unnamed: 0,support
oil & cooking oil,0.14461
oil & cake,0.10709
oil & candy bars,0.08261
ratchet & clank & babies food,0.08237
tomatoes & asparagus,0.07989
oil & tea,0.07785
oil & barbecue sauce,0.07766
cake & cooking oil,0.07009
oil & cologne,0.06737
babies food & cooking oil,0.06728


In [15]:
#APRIORI METHOD 

def apriori_rules(path):
    customer_basket = pd.read_csv(path)

    reader = [eval(item) if isinstance(item, str) else item 
          for item in customer_basket['list_of_goods']]
    
    te = TransactionEncoder()
    te_fit = te.fit(reader).transform(reader)
    transactions_items = pd.DataFrame(te_fit, columns=te.columns_)

    #frequent_itemsets_grocery = apriori(
    #transactions_items, min_support=0.05, use_colnames=True)

    #rules_grocery = association_rules(frequent_itemsets_grocery,
    #                              metric="confidence",
     #                             min_threshold=0.2)
    
    frequent_itemsets_grocery_iter_2 = apriori(
    transactions_items, min_support=0.02, use_colnames=True)

    rules_grocery_iter_2 = association_rules(frequent_itemsets_grocery_iter_2,
                                  metric="confidence",
                                  min_threshold=0.2)

    rules = rules_grocery_iter_2[['antecedents','consequents', 'support','confidence','lift']]

    return rules

In [14]:
#ECLAT METHOD

def eclat_method(path):
    customer_basket = pd.read_csv(path)

    reader = [eval(item) if isinstance(item, str) else item 
          for item in customer_basket['list_of_goods']]
    
    eclat_groceries = ECLAT(data=pd.DataFrame(reader))

    groceries_rules_indexes, groceries_rules_supports = eclat_groceries.fit(min_support=0.02,
                                           min_combination=2,
                                           max_combination=2)
    
    rules_eclat_groceries = pd.DataFrame(list(groceries_rules_supports.values()),
                                         index=list(groceries_rules_supports.keys()),
                                         columns=['support'])
    
    return rules_eclat_groceries.sort_values(by='support', ascending=False).head(10)
    