In [11]:
import numpy as np
import pandas as pd
import json

In [2]:
class Recommender:
    """
        This is the class to make recommendations.
        The class must not require any mandatory arguments for initialization.
    """
    
    def train(self, prices, database) -> None:
        """
            allows the recommender to learn which items exist, which prices they have, and which items have been purchased together in the past
            :param prices: a list of prices in USD for the items (the item ids are from 0 to the length of this list - 1)
            :param database: a list of lists of item ids that have been purchased together. Every entry corresponds to one transaction
            :return: the object should return itself here (this is actually important!)
        """
        
        # do something
        
        # return this object again
        return self

    def get_recommendations(self, cart:list, max_recommendations:int) -> list:
        """
            makes a recommendation to a specific user
            
            :param cart: a list with the items in the cart
            :param max_recommendations: maximum number of items that may be recommended
            :return: list of at most `max_recommendations` items to be recommended
        """
        return [42]  # always recommends the same item (requires that there are at least 43 items)

In [3]:
def eclat(db, minsup):
    def generate_frequent_itemsets(P, minsup, F):
        for i, p_i in enumerate(P):
            Xa, t_Xa = p_i
            if not isinstance(Xa, list):
                Xa = [Xa]
            F.append((Xa, len(t_Xa)))
            Pa = []
            for j in range(i + 1, len(P)):
                Xb, t_Xb = P[j]
                if not isinstance(Xb, list):
                    Xb = [Xb]
                if j > i:
                    Xab = list(set(Xa).union(set(Xb)))
                    Xab.sort()
                    t_Xab = t_Xa.intersection(t_Xb)
                    if len(t_Xab) >= minsup:
                        Pa.append((Xab, t_Xab))
            if len(Pa) != 0:
                generate_frequent_itemsets(Pa, minsup, F)

    P = {}
    for i in range(len(db)):
        for item in db[i]:
            if item in P:
                P[item].add(i)
            else:
                P[item] = {i}
    P = list(P.items())
    
    condition = lambda x: len(x[1]) >= minsup
    P = [item for item in P if condition(item)]

    P = sorted(P, key=lambda x: x[0])
    F = []
    
    generate_frequent_itemsets(P, minsup, F)

    return [(F[i][0], F[i][1]) for i in range(len(F))]


In [36]:
from itertools import chain, combinations

def getStrongRulesFromFrequentSets(fsets, minconf):
    strong_rules = []
    for frequentSet in fsets:
        if len(frequentSet) > 1:
            A = list(chain.from_iterable(combinations(frequentSet, r) for r in range(len(frequentSet) + 1)))
            A.pop(0)
            A.pop(-1)
            while len(A) != 0:
                


    return strong_rules

In [34]:
def getStrongRulesForDatabase(db, minsup, minconf):
    eclat_data = eclat(db, minsup)
    fsets = [item[0] for item in eclat_data]
    strong_rules = getStrongRulesFromFrequentSets(fsets, minconf)
    return strong_rules

In [10]:
file_prices = open('prices.json')
prices = json.load(file_prices)
file_prices.close()

file_data = open('training_data.json')
data = json.load(file_data)
file_data.close()

In [13]:
def set_pandas_display_options():
    display = pd.options.display

    display.max_columns = 1000
    display.max_rows = 2240
    display.max_colwidth = 199
    display.width = 1000

set_pandas_display_options()

In [15]:
minsup = 50

F_shop = eclat(data, minsup)
df = pd.DataFrame(F_shop, columns=['Frequent Itemset', 'Support'])
df

Unnamed: 0,Frequent Itemset,Support
0,[0],54
1,[1],287
2,"[1, 10]",56
3,"[1, 31]",55
4,"[1, 61]",52
...,...,...
2469,"[95, 96]",128
2470,[96],932
2471,[97],250
2472,[98],617


In [37]:
test = getStrongRulesForDatabase(data, minsup=minsup, minconf=1)


[(1,), (10,)]
[(1,), (31,)]
[(1,), (61,)]
[(1,), (77,)]
[(2,), (3,)]
[(2,), (5,)]
[(2,), (9,)]
[(2,), (10,)]
[(2,), (12,)]
[(2,), (14,)]
[(2,), (20,)]
[(2,), (23,)]
[(2,), (24,)]
[(2,), (28,)]
[(2,), (29,)]
[(2,), (31,)]
[(2,), (32,)]
[(2,), (33,)]
[(2,), (34,)]
[(2,), (35,)]
[(2,), (40,)]
[(2,), (42,)]
[(2,), (43,)]
[(2,), (49,)]
[(2,), (57,)]
[(2,), (58,)]
[(2,), (60,)]
[(2,), (61,)]
[(2,), (65,)]
[(2,), (66,)]
[(2,), (69,)]
[(2,), (70,)]
[(2,), (75,)]
[(2,), (77,)]
[(2,), (94,)]
[(2,), (95,)]
[(2,), (96,)]
[(3,), (5,)]
[(3,), (9,)]
[(3,), (10,)]
[(3,), (12,)]
[(3,), (13,)]
[(3,), (14,)]
[(3,), (15,)]
[(3,), (16,)]
[(3,), (20,)]
[(3,), (23,)]
[(3,), (24,)]
[(3,), (26,)]
[(3,), (27,)]
[(3,), (28,)]
[(3,), (29,)]
[(3,), (31,)]
[(3,), (32,)]
[(3,), (33,)]
[(3,), (34,)]
[(3,), (35,)]
[(3,), (36,)]
[(3,), (40,)]
[(3,), (41,)]
[(3,), (42,)]
[(3,), (43,)]
[(3,), (45,)]
[(3,), (46,)]
[(3,), (47,)]
[(3,), (48,)]
[(3,), (49,)]
[(3,), (52,)]
[(3,), (53,)]
[(3,), (57,)]
[(3,), (58,)]
[(3,), (60,