In [176]:
import numpy as np
import pandas as pd
import scipy
from scipy.optimize import minimize


In [177]:
def eclat(db, minsup):
    def generate_frequent_itemsets(P, minsup, F):
        for i, p_i in enumerate(P):
            Xa, t_Xa = p_i
            if not isinstance(Xa, list):
                Xa = [Xa]
            F.append((Xa, len(t_Xa)))
            Pa = []
            for j in range(i + 1, len(P)):
                Xb, t_Xb = P[j]
                if not isinstance(Xb, list):
                    Xb = [Xb]
                if j > i:
                    Xab = list(set(Xa).union(set(Xb)))
                    Xab.sort()
                    t_Xab = t_Xa.intersection(t_Xb)
                    if len(t_Xab) >= minsup:
                        Pa.append((Xab, t_Xab))
            if len(Pa) != 0:
                generate_frequent_itemsets(Pa, minsup, F)

    P = {}
    for i in range(len(db)):
        for item in db[i]:
            if item in P:
                P[item].add(i)
            else:
                P[item] = {i}
    P = list(P.items())
    
    condition = lambda x: len(x[1]) >= minsup
    P = [item for item in P if condition(item)]

    P = sorted(P, key=lambda x: x[0])
    F = []
    
    generate_frequent_itemsets(P, minsup, F)

    return [(F[i][0], F[i][1]) for i in range(len(F))]

In [178]:
def getConfidence(sup_xy, sup_x):
    return sup_xy / sup_x

In [179]:
def getLift(conf, sup_y, len_database):
    rsup_y = sup_y / len_database
    lift = conf / rsup_y
    return lift

In [180]:
def getLeverage(sup_xy, sup_x, sup_y, len_database):
    rsup_xy = sup_xy / len_database
    rsup_x = sup_x / len_database
    rsup_y = sup_y / len_database

    leverage = rsup_xy - (rsup_x * rsup_y)
    return leverage

In [181]:
def getJaccard(sup_xy, sup_x, sup_y):
    jaccard_denominator = sup_x + sup_y - sup_xy
    jaccard = sup_xy / jaccard_denominator
    return jaccard

In [182]:
def getConviction(conf, sup_y, len_database):
    rsup_y = sup_y / len_database
    conviction_denominator = 1 - conf
    conviction = (1 - rsup_y) / conviction_denominator
    return conviction

In [183]:
def getOddsRatio(sup_xy, sup_x, sup_y, len_database):
    sup_nox_y = sup_y - sup_xy
    sup_x_noy = sup_x - sup_xy
    sup_nox_noy = len_database - sup_xy - sup_nox_y - sup_x_noy

    odds_denominator = sup_x_noy * sup_nox_y
    odds = (sup_xy * sup_nox_noy) / odds_denominator
    return odds

In [184]:
def getStrongRulesFromFrequentSets(fsets, minconf, len_database):
    strong_rules = []
    fsets_sets = [item[0] for item in fsets]
    fsets_supp = [item[1] for item in fsets]
    for i, frequentSet in enumerate(fsets_sets):
        if len(frequentSet) >= 2:
            A = getSubsets(set=frequentSet)
            while len(A) != 0:
                X = A[-1]
                A.remove(X)

                sup_xy = fsets_supp[i]
                
                index_x = fsets_sets.index(X)
                sup_x = fsets_supp[index_x]

                conf = getConfidence(sup_xy, sup_x)
                if conf >= minconf:
                    #Y is the complement of X in the set frequentSet
                    Y = list(frequentSet)
                    for item in X:
                        Y.remove(item)

                    sup_y = fsets_supp[fsets_sets.index(Y)]

                    rsup_xy = sup_xy / len_database
                    lift = getLift(conf, sup_y, len_database)
                    lev = getLeverage(sup_xy, sup_x, sup_y, len_database)
                    jacc = getJaccard(sup_xy, sup_x, sup_y)
                    conv = getConviction(conf, sup_y, len_database)
                    odds = getOddsRatio(sup_xy, sup_x, sup_y, len_database)

                    strong_rules.append((X, Y, (conf, lift, lev, conv, odds)))
                else:
                    if len(X) >= 2:
                        W_sets = getSubsets(X)
                        for W in W_sets:
                            if W in A:
                                A.remove(W)

    return strong_rules

def getSubsets(set):
    subsets = []
    x = len(set)
    for i in range(1 << x):
       subsets.append([set[j] for j in range(x) if (i & (1 << j))])

    subsets.pop(-1)
    subsets.pop(0)

    return subsets

In [185]:
def getStrongRulesForDatabase(db, minsup, minconf):
    fsets = eclat(db, minsup)

    len_data = len(db)

    strong_rules = getStrongRulesFromFrequentSets(fsets, minconf, len_data)
    return strong_rules

In [186]:
def calculate_weighted_score(conf, lift, lev, conv, odds, weights):
    total_score = np.average([conf, lift, lev, conv, odds], weights=weights, axis=0)
    return total_score

In [187]:
def objective_function(weights, conf, lift, lev, conv, odds):
    total_score = 0
    for i in range(len(lift)):
        score = calculate_weighted_score(conf[i], lift[i], lev[i], conv[i], odds[i], weights) #sup[i], conf[i], lift[i], lev[i], jacc[i], conv[i], odds[i], weights
        total_score = total_score + score

    total_score = total_score/len(lift)
    return 0-total_score

def optimize_hyperparameters(metrics):
    bounds = [(0, 1)] * len(metrics)
    initial_weights = [1/len(metrics)] * len(metrics)
    
    result = minimize(objective_function, initial_weights, args=metrics, bounds=bounds)

    optimal_weights = result.x
    return optimal_weights

In [191]:
class Recommender:
    """
        This is the class to make recommendations.
        The class must not require any mandatory arguments for initialization.
    """
    def __init__(self):
        self.rules = {}
        self.prices = {}
        self.weights = []


    def train(self, prices, database) -> None:
        """
            allows the recommender to learn which items exist, which prices they have, and which items have been purchased together in the past
            :param prices: a list of prices in USD for the items (the item ids are from 0 to the length of this list - 1)
            :param database: a list of lists of item ids that have been purchased together. Every entry corresponds to one transaction
            :return: the object should return itself here (this is actually important!)
        """
        
        rules_db = getStrongRulesForDatabase(db=database, minsup=0.002*len(database), minconf=0.1)
        premises, conclusions, metrics = [], [], []

        for rule in rules_db:
            premises.append(tuple(rule[0]))
            conclusions.append(tuple(rule[1]))
            metrics.append(rule[2])
        
        print(len(metrics))
        print(len(metrics[0]))
 
        normalized_metrics = []
        grouped_metrics = ()
        for i in range(len(metrics[0])):
            metric = [x[i] for x in metrics]
            grouped_metrics = grouped_metrics + (metric,)
            min_metric = min(metric)
            max_metric = max(metric)
                
            normalized_metric = []
            for meassure in metric:
                normalized_meassure = (meassure - min_metric) / (max_metric - min_metric)
                normalized_metric.append(normalized_meassure)
            normalized_metrics.append(normalized_metric)

        metrics = list(zip(*normalized_metrics))
        
        print(len(metrics))
        print(len(metrics[0]))
        
        temp_rules = list(zip(premises,conclusions))
        for i, rule in enumerate(temp_rules):
            self.rules[rule] = metrics[i]

        for i, price in enumerate(prices):
            self.prices[i] = price
        self.weights = optimize_hyperparameters(grouped_metrics)

        return self

    def get_recommendations(self, cart:list, max_recommendations:int) -> list:
        """
            makes a recommendation to a specific user
            
            :param cart: a list with the items in the cart
            :param max_recommendations: maximum number of items that may be recommended
            :return: list of at most `max_recommendations` items to be recommended
        """

        rules = list(self.rules.keys())
        premises, conclussions = [],[]
        for rule in rules:
            premises.append(list(rule[0]))
            conclussions.append(list(rule[1]))

        #Gets only the conclusions in which the cart is a subset or equal to the premise
        possible_recommendations = []
        for i, premise in enumerate(premises):
            if (all(x in cart for x in premise)):
                rule = (tuple(premise), tuple(conclussions[i]))
                metrics = self.rules[rule]
                
                total_score = calculate_weighted_score(metrics[0], metrics[1], metrics[2],metrics[3],metrics[4],self.weights)
                
                possible_recommendations.append((conclussions[i], total_score))
        possible_recommendations = sorted(possible_recommendations, key=lambda x:x[1])

        #Gets the (at least) 10 best items according to our evaluation and sorts them by price
        best_recommendations = []
        best_recommendations_prices = []

        for i in range(len(possible_recommendations)):
            if len(best_recommendations) >= max_recommendations + 30:
                break
            
            #Add the items in the best rule
            for item in possible_recommendations[-1][0]:
                if item not in best_recommendations:
                    best_recommendations.append(item)
                    best_recommendations_prices.append(self.prices[item])
                possible_recommendations[-1][0].remove(item)
                
            possible_recommendations.pop(-1)
        
        best_recommendations = [x for _, x in sorted(zip(best_recommendations_prices, best_recommendations), key=lambda pair: pair[0])]

        recommendations = []
        i=0
        while i < max_recommendations:
            if len(best_recommendations) == 0:
                break

            if best_recommendations[-1] not in recommendations:
                recommendations.append(best_recommendations.pop(-1))
                i = i + 1
            else:
                best_recommendations.pop(-1)

        if len(recommendations) > 0:
            return recommendations
        else:
            return[0]

In [189]:
with open('prices.json', 'r') as file:
    prices = eval(file.read())

with open('training_data.json', 'r') as file:
    data = eval(file.read())

In [192]:
import random

recommender = Recommender()
recommender = recommender.train(prices=prices, database=data)

carrito = [random.randint(0,99),random.randint(0,99),random.randint(0,99),random.randint(0,99),random.randint(0,99)]
carrito = list(set(carrito))

a = recommender.get_recommendations(cart=carrito, max_recommendations=3)
print(a)
print(list(zip(range(99), prices)))

16036
5
16036
5
[57, 31, 18]
[(0, 45.75), (1, 12.85), (2, 14.42), (3, 4.0), (4, 11.97), (5, 16.12), (6, 8.88), (7, 11.73), (8, 16.04), (9, 1.86), (10, 2.09), (11, 4.35), (12, 1.3), (13, 6.57), (14, 8.69), (15, 3.75), (16, 9.88), (17, 5.64), (18, 26.39), (19, 3.57), (20, 24.07), (21, 13.11), (22, 11.11), (23, 0.5), (24, 5.68), (25, 3.84), (26, 2.66), (27, 5.3), (28, 4.09), (29, 10.46), (30, 3.88), (31, 29.68), (32, 4.24), (33, 3.81), (34, 2.91), (35, 1.52), (36, 2.11), (37, 35.01), (38, 3.44), (39, 42.85), (40, 28.3), (41, 4.82), (42, 1.38), (43, 1.65), (44, 4.95), (45, 1.8), (46, 4.01), (47, 6.15), (48, 1.97), (49, 1.93), (50, 6.25), (51, 5.82), (52, 1.92), (53, 2.72), (54, 5.04), (55, 10.7), (56, 15.91), (57, 32.52), (58, 4.13), (59, 5.72), (60, 8.34), (61, 4.78), (62, 12.57), (63, 2.64), (64, 27.54), (65, 3.37), (66, 1.41), (67, 15.75), (68, 1.25), (69, 3.09), (70, 2.32), (71, 12.18), (72, 8.35), (73, 4.7), (74, 6.78), (75, 0.78), (76, 6.18), (77, 7.88), (78, 34.75), (79, 2.44), (80,