In [208]:
import numpy as np
import pandas as pd
import scipy
from scipy.optimize import minimize


In [209]:
def eclat(db, minsup):
    def generate_frequent_itemsets(P, minsup, F):
        for i, p_i in enumerate(P):
            Xa, t_Xa = p_i
            if not isinstance(Xa, list):
                Xa = [Xa]
            F.append((Xa, len(t_Xa)))
            Pa = []
            for j in range(i + 1, len(P)):
                Xb, t_Xb = P[j]
                if not isinstance(Xb, list):
                    Xb = [Xb]
                if j > i:
                    Xab = list(set(Xa).union(set(Xb)))
                    Xab.sort()
                    t_Xab = t_Xa.intersection(t_Xb)
                    if len(t_Xab) >= minsup:
                        Pa.append((Xab, t_Xab))
            if len(Pa) != 0:
                generate_frequent_itemsets(Pa, minsup, F)

    P = {}
    for i in range(len(db)):
        for item in db[i]:
            if item in P:
                P[item].add(i)
            else:
                P[item] = {i}
    P = list(P.items())
    
    condition = lambda x: len(x[1]) >= minsup
    P = [item for item in P if condition(item)]

    P = sorted(P, key=lambda x: x[0])
    F = []
    
    generate_frequent_itemsets(P, minsup, F)

    return [(F[i][0], F[i][1]) for i in range(len(F))]

In [210]:
def getConfidence(sup_xy, sup_x):
    return sup_xy / sup_x

In [211]:
def getLift(conf, sup_y, len_database):
    rsup_y = sup_y / len_database
    lift = conf / rsup_y
    return lift

In [212]:
def getLeverage(sup_xy, sup_x, sup_y, len_database):
    rsup_xy = sup_xy / len_database
    rsup_x = sup_x / len_database
    rsup_y = sup_y / len_database

    leverage = rsup_xy - (rsup_x * rsup_y)
    return leverage

In [213]:
def getJaccard(sup_xy, sup_x, sup_y):
    jaccard_denominator = sup_x + sup_y - sup_xy
    jaccard = sup_xy / jaccard_denominator
    return jaccard

In [214]:
def getConviction(conf, sup_y, len_database):
    rsup_y = sup_y / len_database
    conviction_denominator = 1 - conf
    conviction = (1 - rsup_y) / conviction_denominator
    return conviction

In [215]:
def getOddsRatio(sup_xy, sup_x, sup_y, len_database):
    sup_nox_y = sup_y - sup_xy
    sup_x_noy = sup_x - sup_xy
    sup_nox_noy = len_database - sup_xy - sup_nox_y - sup_x_noy

    odds_denominator = sup_x_noy * sup_nox_y
    odds = (sup_xy * sup_nox_noy) / odds_denominator
    return odds

In [216]:
def getStrongRulesFromFrequentSets(fsets, minconf, len_database):
    strong_rules = []
    fsets_sets = [item[0] for item in fsets]
    fsets_supp = [item[1] for item in fsets]
    for i, frequentSet in enumerate(fsets_sets):
        if len(frequentSet) >= 2:
            A = getSubsets(set=frequentSet)
            while len(A) != 0:
                X = A[-1]
                A.remove(X)

                sup_xy = fsets_supp[i]
                
                index_x = fsets_sets.index(X)
                sup_x = fsets_supp[index_x]

                conf = getConfidence(sup_xy, sup_x)
                if conf >= minconf:
                    #Y is the complement of X in the set frequentSet
                    Y = list(frequentSet)
                    for item in X:
                        Y.remove(item)

                    sup_y = fsets_supp[fsets_sets.index(Y)]

                    rsup_xy = sup_xy / len_database
                    lift = getLift(conf, sup_y, len_database)
                    lev = getLeverage(sup_xy, sup_x, sup_y, len_database)
                    jacc = getJaccard(sup_xy, sup_x, sup_y)
                    conv = getConviction(conf, sup_y, len_database)
                    odds = getOddsRatio(sup_xy, sup_x, sup_y, len_database)

                    strong_rules.append((X, Y, (conf, lift, lev, jacc, conv, odds)))
                else:
                    if len(X) >= 2:
                        W_sets = getSubsets(X)
                        for W in W_sets:
                            if W in A:
                                A.remove(W)

    return strong_rules

def getSubsets(set):
    subsets = []
    x = len(set)
    for i in range(1 << x):
       subsets.append([set[j] for j in range(x) if (i & (1 << j))])

    subsets.pop(-1)
    subsets.pop(0)

    return subsets

In [217]:
def getStrongRulesForDatabase(db, minsup, minconf):
    fsets = eclat(db, minsup)

    len_data = len(db)

    strong_rules = getStrongRulesFromFrequentSets(fsets, minconf, len_data)
    return strong_rules

In [218]:
def calculate_weighted_score(conf, lift, lev, jacc, conv, odds, weights):
    total_score = np.average([conf, lift, lev, jacc, conv, odds,], weights=weights, axis=0)
    return total_score

In [219]:
def objective_function(weights, conf, lift, lev, conv, odds):
    total_score = 0
    for i in range(len(lift)):
        score = calculate_weighted_score(conf[i], lift[i], lev[i], conv[i], odds[i], weights) #sup[i], conf[i], lift[i], lev[i], jacc[i], conv[i], odds[i], weights
        total_score = total_score + score

    total_score = total_score/len(lift)
    return 0-total_score

def optimize_hyperparameters(metrics):
    bounds = [(0, 1)] * len(metrics)
    initial_weights = [1/len(metrics)] * len(metrics)
    
    result = minimize(objective_function, initial_weights, args=metrics, bounds=bounds)

    optimal_weights = result.x
    return optimal_weights

In [220]:
#RECOMMENDER
class Recommender:
    """
        This is the class to make recommendations.
        The class must not require any mandatory arguments for initialization.
    """
    def __init__(self):
        self.rules = {}
        self.prices = {}
        self.weights = [0.1,0.2,0.2,0.05,0.15,0.25]


    def train(self, prices, database) -> None:
        """
            allows the recommender to learn which items exist, which prices they have, and which items have been purchased together in the past
            :param prices: a list of prices in USD for the items (the item ids are from 0 to the length of this list - 1)
            :param database: a list of lists of item ids that have been purchased together. Every entry corresponds to one transaction
            :return: the object should return itself here (this is actually important!)
        """
        
        rules_db = getStrongRulesForDatabase(db=database, minsup=0.002*len(database), minconf=0.1)
        premises, conclusions, metrics = [], [], []

        for rule in rules_db:
            premises.append(tuple(rule[0]))
            conclusions.append(tuple(rule[1]))
            metrics.append(rule[2])

        print(metrics[0:10])

        normalized_metrics = []
        grouped_metrics = ()
        for i in range(len(metrics[0])):
            metric = [x[i] for x in metrics]
            grouped_metrics = grouped_metrics + (metric,)
            min_metric = min(metric)
            max_metric = max(metric)
                
            normalized_metric = []
            for meassure in metric:
                normalized_meassure = (meassure - min_metric) / (max_metric - min_metric)
                normalized_metric.append(normalized_meassure)
            normalized_metrics.append(normalized_metric)

        metrics = list(zip(*normalized_metrics))

        print(metrics[0:10])
        temp_rules = list(zip(premises,conclusions))
        for i, rule in enumerate(temp_rules):
            self.rules[rule] = metrics[i]

        for i, price in enumerate(prices):
            self.prices[i] = price

        return self

    def get_recommendations(self, cart:list, max_recommendations:int) -> list:
        """
            makes a recommendation to a specific user
            
            :param cart: a list with the items in the cart
            :param max_recommendations: maximum number of items that may be recommended
            :return: list of at most `max_recommendations` items to be recommended
        """

        rules = list(self.rules.keys())
        premises, conclussions = [],[]
        for rule in rules:
            premises.append(list(rule[0]))
            conclussions.append(list(rule[1]))

        #Gets only the conclusions in which the cart is a subset or equal to the premise
        possible_recommendations = []
        for i, premise in enumerate(premises):
            if (all(x in cart for x in premise)):
                rule = (tuple(premise), tuple(conclussions[i]))
                metrics = self.rules[rule]
                
                total_score = calculate_weighted_score(metrics[0], metrics[1], metrics[2],metrics[3],metrics[4],metrics[5],self.weights)

                possible_recommendations.append((conclussions[i], total_score))
        possible_recommendations = sorted(possible_recommendations, key=lambda x:x[1])

        #Gets 0.4 of the total best items according to our evaluation and sorts them by price
        best_recommendations = []
        best_recommendations_prices = []

        for i in range(len(possible_recommendations)):
            if len(best_recommendations) >= max_recommendations + 0.4 * len(self.prices):
                break
            
            #Add the items in the best rule
            for item in possible_recommendations[-1][0]:
                if item not in best_recommendations:
                    best_recommendations.append(item)
                    best_recommendations_prices.append(self.prices[item])
                possible_recommendations[-1][0].remove(item)
                
            possible_recommendations.pop(-1)
        
        best_recommendations = [x for _, x in sorted(zip(best_recommendations_prices, best_recommendations), key=lambda pair: pair[0])]

        recommendations = []
        i=0
        while i < max_recommendations:
            if len(best_recommendations) == 0:
                break

            if best_recommendations[-1] not in recommendations:
                recommendations.append(best_recommendations.pop(-1))
                i = i + 1
            else:
                best_recommendations.pop(-1)

        if len(recommendations) > 0:
            return recommendations
        else:
            return[0]

In [221]:
with open('prices.json', 'r') as file:
    prices = eval(file.read())

with open('training_data.json', 'r') as file:
    data = eval(file.read())

In [222]:
import random

recommender = Recommender()
recommender = recommender.train(prices=prices, database=data)

carrito = [random.randint(0,99),random.randint(0,99),random.randint(0,99),random.randint(0,99),random.randint(0,99)]
carrito = list(set(carrito))

a = recommender.get_recommendations(cart=carrito, max_recommendations=3)
print(a)
print(list(zip(range(99), prices)))

[(0.156794425087108, 2.8907990196420714, 0.003199279300567108, 1.1216254940711463, 3.4646575891069284), (0.13937282229965156, 1.7613048971933987, 0.0018793005671077505, 1.0699982397465235, 1.9360229733546748), (0.10199556541019955, 3.269544257051693, 0.003470734877126654, 1.078841384863124, 4.0097126171814965), (0.1602787456445993, 3.269544257051693, 0.003470734877126654, 1.132492783691142, 4.0097126171814965), (0.1951219512195122, 2.1999043519846966, 0.003320037807183365, 1.1322266139657442, 2.6006379585326953), (0.11498257839721254, 2.1241761470970992, 0.001898322306238185, 1.068758130777131, 2.3603759207518413), (0.11149825783972125, 1.399432431276174, 0.000992781190926276, 1.035817988064791, 1.4700791586249335), (0.1218274111675127, 3.9052689294115566, 0.0038814035916824204, 1.1032049509927118, 4.972742883401456), (0.1672473867595819, 3.905268929411557, 0.0038814035916824204, 1.149409678006185, 4.972742883401456), (0.12195121951219512, 1.839264294282287, 0.0017359404536862002, 1.06

IndexError: tuple index out of range