# Supermarket recommendation system Code

In [1]:
import csv
import ast
import numpy as np
from itertools import combinations
from operator import itemgetter

### Supermarket data Preprocessing

In [2]:
trans = [] #all transactions
with open("supermarket.csv") as csv_file:
    read_file = csv.reader(csv_file, delimiter = ',')
    for row in read_file:
        row.pop(-1)
        trans.append(row)
all_items = [j for i in trans for j in i] #all items in all transactions
np_items = np.array(all_items)
unique = list(np.unique(np_items))
max_row = max(len(x) for x in trans)
#print(unique)

### To get Frequency for k=1 itemset 

In [3]:
def itemfreq(data):
    unique, counts = np.unique(data, return_counts=True)
    freq = dict(zip(unique, counts))
    return freq

### Apriori function to get frequent itemsets

In [4]:
def apriori_func(all_items, trans, minsup):

    can_set = []
    pruned_set = []
    item_dict = {}
    ini_freq = itemfreq(all_items)
    sup_count = {}
    sup_count[str(1)]= {k:v for k,v in ini_freq.items()}
    super_dict = {}
    super_dict[str(1)] = {k:v / len(trans) for k,v in ini_freq.items()}
    sub_a = [[b] for b in super_dict['1'].keys()]
    item_set = [sub_a]
    sup_check = {}
    sup_counter = {}

    for k in range(1, 48):
        for count, i in enumerate(sub_a):
            if count >= len(sub_a):
                    break
            for count_b, j in enumerate(sub_a, start=count+1):
                if count_b >= len(sub_a):
                    break
                p = sub_a[count]
                p_copy = p.copy()
                q = sub_a[count_b]
                if p[0:-1] == q[0:-1]:
                    p_copy.append(q[-1])
                    can_set.append(p_copy)

        for subset in can_set:
            check = combinations(subset, k)
            lst = [list(s) for s in check]
            for indx, l in enumerate(lst, start=0):
                if l in item_set[k-1]:
                    if indx == k:
                        pruned_set.append(subset) ### creates candidate set

        for m in pruned_set:
            for n in trans:
                s = str(m)
                if all(stuff in n for stuff in m):
                    item_dict[s] = item_dict.get(s, 0) + 1  ###does support count for itemset
                else:
                    item_dict[s] = item_dict.get(s, 0)
                    
        #sup_count[str(k+1)].append(item_dict)
        
        for key, val in item_dict.items():
            support = val / len(trans) ###calculates support and applies constraint
            if support >= minsup:
                sup_check[key] = round(support, 3)
                sup_counter[key] = val
                
        if bool(sup_check) == False:
            break
        sup_count[str(k+1)] = sup_counter
        sup_set = [ast.literal_eval(v) for v in sup_check.keys()]
        super_dict[str(k+1)] = sup_check
        item_dict = {}
        item_set.insert(k, sup_set)
        sub_a = item_set[k]
        can_set = []
        pruned_set = []
        sup_check = {}
        sup_counter = {}
    return super_dict, sup_count ###returns support dict and support count dict

### Function to generate rules and calculate confidence/lift as well as sort the output

In [5]:
def association_func(frequent_sets, freq_sup, minconf, minlift):
    denom_lst = []
    conf_dict = {}
    lift_dict = {}
    final_dict = {}
    for k in range (2, len(frequent_sets) + 1):
        for keyy, value in frequent_sets[str(k)].items():
            key = ast.literal_eval(keyy)
            for i in range(1, len(key)):
                denom = combinations(key, i)
                denom_lst = [list(x) for x in denom]
                for j in denom_lst:
                    numer_lst = [y for y in key if y not in j]
                    conf_val = (value / frequent_sets.get(str(len(numer_lst))).get(str(numer_lst)))
                    lift_val = (conf_val / freq_sup.get(str(len(j))).get(str(j)))
                    b_val = freq_sup.get(str(len(j))).get(str(j))
                    if (conf_val >=  minconf) and (lift_val >= minlift):
                        val_lst = [conf_val, round(lift_val, 3)]
                        final_dict[str(numer_lst) + '-->' + str(j)] = { "rule": str(numer_lst) + '-->' + str(j), "length": len(key), 'lift': round(lift_val, 3), 'confidence': round(conf_val, 3), 'support': freq_sup.get(str(len(key))).get(keyy)}     
    final_lst = list(final_dict.values())
    ###sort by length, lift, conf, sup respectively then descending order
    sorted_final = sorted(final_lst, key=itemgetter('length', 'lift', 'confidence', 'support'), reverse=True) 
    return sorted_final

### Generate frequent itemsets (by running Apriori Function on datasets)

In [6]:
def apriori_run(all_items, trans, minsup):
    sup, sup_count = apriori_func(all_items, trans, minsup)
    sup['1'] = {k.replace(k, str([k])): v for k, v in sup.get('1').items()} 
    sup_count['1'] = {k.replace(k, str([k])): v for k, v in sup_count.get('1').items()} 
    
    return sup, sup_count

In [7]:
sup, sup_count = apriori_run(all_items, trans, 0.15) 
###Takes three parameters all_items (which is a list of every item in dataset), 
###trans(which is each row in dataset) and minsup
###returns sup (dict of support of each itemset) and sup_count (dict of support count of each itemsset)

### Generate Rules, calculations and sort

In [8]:
def rules_run(sup, sup_count, minconf, minlift):
    total_sets = sum([len(t) for t in sup.values()]) #number of frequent itemsets
    rules = association_func(sup_count, sup, minconf, minlift) #Takes four parameters support count, support, minconf, minlift respectively
    rule_count = 0
    for r in rules:
        rule_count += 1
    return total_sets, rules, rule_count

In [9]:
#Takes four parameters support, support count, minconf, minlift respectively
total_sets, rules, rule_count = rules_run(sup, sup_count, 0.80, 0)

In [10]:
print('number of frequents itemset:', total_sets)
print('number of rules:', rule_count)
print('Itemsets:')   
for s in sup.values():
    for x, y in s.items():
        print(x, y)  ###prints itemsets with their support
print(' ')
print('Rules:')
for r in rules:
    print(r)

number of frequents itemset: 1838
number of rules: 737
Itemsets:
['750ml red imp'] 0.020963907499459693
['750ml red nz'] 0.019667170953101363
['750ml white imp'] 0.02139615301491247
['750ml white nz'] 0.060730494921115194
['baby needs'] 0.13377998703263452
['bake off products'] 0.12146098984223039
['baking needs'] 0.6040631078452561
['beef'] 0.37583747568618975
['beverages hot'] 0.09833585476550681
['biscuits'] 0.5629997838772423
['bread and cake'] 0.719688783228874
['breakfast food'] 0.40242057488653554
['brushware'] 0.023557380592176356
['canned fish-meat'] 0.20337151502053166
['canned fruit'] 0.27728549816295656
['canned vegetables'] 0.3408255889345148
['casks red wine'] 0.011022260644045817
['casks white wine'] 0.03760535984439162
['cheese'] 0.40609466176788417
['chickens'] 0.0045385779122541605
['cigarette cartons'] 0.007996542035876377
['cigs-tobacco pkts'] 0.15106980765074562
['cleaners-polishers'] 0.2727469202507024
['coffee'] 0.2364382969526691
['cold-meats'] 0.145234493192133