In [1]:
import pandas as pd
import numpy as np
from itertools import combinations, chain
import csv

## Rule Class

In [2]:
class Rule:
    def __init__(
        self,
        S: list,
        X_S: list,
        items: list,
        sup,
        conf, 
        lift
    ):
        self.S = S
        self.X_S = X_S
        self.items = items
        self.sup = sup
        self.conf = conf
        self.lift = lift

    
    def __str__(self):
        s1 = []
        s2 = []
        for it1 in self.S:
            s1.append(self.items[it1])
        for it2 in self.X_S:
            s2.append(self.items[it2])
        return "Rule: {} -> {}   Conf:{}, Sup:{}, Lift:{}".format(list(s1), list(s2), round(self.conf, 2), round(self.sup, 2),
                                                                  round(self.lift, 2))
    
    def __lt__(self, other):
        if len(self.S)+len(self.X_S) == len(other.S)+len(other.X_S):
            if self.lift == other.lift:
                return self.conf > other.conf
            else:
                return self.lift > other.lift
        else:
            return len(self.S)+len(self.X_S) < len(other.S)+len(other.X_S)

## Get Transaction Matrix

In [3]:
def get_transaction_matrix(filename):
    
    items=[]
    
    with open(filename, 'r', encoding="UTF-8") as read_obj:

        csv_reader = csv.reader(read_obj)
        k=0
        
        for row in csv_reader:
            row = list(filter(None, row))
            for item in row:
                if item not in items:
                    items.append(item)
            k+=1
        read_obj.close()
        
    
    a1 = np.zeros([k,len(items)], dtype=int) 
    with open(filename,'r', encoding="UTF-8") as read_obj:
    
        csv_reader = csv.reader(read_obj)
        k=0
    
        for row in csv_reader:
            row = list(filter(None, row))
            for item in row:
                a1[k][items.index(item)] = 1
            k += 1
    
        return a1, items

## Get Count 

In [4]:
def get_count(itemset, t_matrix):
    
        for item in itemset:
            
            t_matrix = t_matrix[t_matrix[:,item]==1]
            
        return len(t_matrix)

## Size 1 Frequent Set Generation

In [5]:
def get_1_frequent_sets(k_itemset, n, t_matrix, min_sup, counts):
    
    freq_set = set()
    
    for x in range(len(k_itemset)):
        
        x = frozenset([x])
        c = get_count(x, t_matrix)
        
        if c/n >= min_sup:
            
            counts[x] = c
            freq_set.add(x)
    
    return freq_set, counts

## Get Frequent Sets

In [6]:
def get_frequent_sets(k_itemset, n, k, min_sup, t_matrix, counts):
    
    item_set = set()
    
    for its1 in k_itemset:
        for its2 in k_itemset:
            if len(its1.union(its2)) == k:
                item_set.add(frozenset(its1.union(its2)))
    
    freq_set = set()
    
    for its in item_set:
        
        c = get_count(its, t_matrix)
        
        if (c)/n >= min_sup:
            
            counts[its] = c
            freq_set.add(its)
    
    return freq_set, counts

## Get Rules 

In [7]:
def get_rules(k_itemset, n, t_matrix, items, index, min_sup, min_conf, min_lift, counts):
    
    rules=[]

    for its in k_itemset:
        
        count = counts[its]
        r = list(chain.from_iterable(combinations(its, r) for r in range(1, len(its)+1)))
        r.pop()
  
        for s in r:
                    
            S = set(s)
            X = set(its)
            X_S = set(X-S)

            support_x_s = counts[frozenset(X_S)]
            confidence = count/(counts[frozenset(S)])
            
            if confidence >= min_conf:

                lift = confidence/ (support_x_s/n)
                
                if lift >= min_lift:
                    
                    rs = Rule(list(S), list(X_S), items, support_x_s, confidence, lift)
                    rules.append(rs)
    
    return rules

## Main function

In [8]:
def main(filename, min_sup, min_conf, min_lift):
    
    t_matrix, items = get_transaction_matrix(filename)
    
    index = np.array([k for k in range(len(items))])
    k_itemset = index.copy()
    n = len(t_matrix)
    counts = dict()
    rules = []
    
    k_itemset, counts = get_1_frequent_sets(k_itemset, n, t_matrix, min_sup, counts)
    
    k=2
    
    while len(k_itemset) != 0:
        
        k_itemset, counts = get_frequent_sets(k_itemset, n, k, min_sup, t_matrix, counts)
        r = get_rules(k_itemset, n, t_matrix, items, index, min_sup, min_conf, min_lift, counts)
        k += 1
        rules = rules + r
    
    rules.sort()
    
    for rule in rules:
        
        print(rule)

## Caller

In [9]:
filename = "supermarket.csv"
min_sup = 0.25
min_conf = 0.8
min_lift = 1

main(filename, min_sup, min_conf, min_lift)

Rule: ['total = high'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.17
Rule: ['margarine'] -> ['bread and cake']   Conf:0.8, Sup:3330, Lift:1.11
Rule: ['margarine', 'fruit'] -> ['bread and cake']   Conf:0.85, Sup:3330, Lift:1.18
Rule: ['biscuits', 'vegetables'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.17
Rule: ['milk-cream', 'tissues-paper prd'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.17
Rule: ['milk-cream', 'biscuits'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.17
Rule: ['biscuits', 'fruit'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.17
Rule: ['milk-cream', 'margarine'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.17
Rule: ['margarine', 'biscuits'] -> ['bread and cake']   Conf:0.84, Sup:3330, Lift:1.16
Rule: ['frozen foods', 'biscuits'] -> ['bread and cake']   Conf:0.83, Sup:3330, Lift:1.16
Rule: ['margarine', 'vegetables'] -> ['bread and cake']   Conf:0.83, Sup:3330, Lift:1.16
Rule: ['biscuits', 'tissues-paper prd'] -> ['bread an