In [18]:
import numpy as np
import pandas as pd
import scipy

In [19]:
def eclat(db, minsup):
    def generate_frequent_itemsets(P, minsup, F):
        for i, p_i in enumerate(P):
            Xa, t_Xa = p_i
            if not isinstance(Xa, list):
                Xa = [Xa]
            F.append((Xa, len(t_Xa)))
            Pa = []
            for j in range(i + 1, len(P)):
                Xb, t_Xb = P[j]
                if not isinstance(Xb, list):
                    Xb = [Xb]
                if j > i:
                    Xab = list(set(Xa).union(set(Xb)))
                    Xab.sort()
                    t_Xab = t_Xa.intersection(t_Xb)
                    if len(t_Xab) >= minsup:
                        Pa.append((Xab, t_Xab))
            if len(Pa) != 0:
                generate_frequent_itemsets(Pa, minsup, F)

    P = {}
    for i in range(len(db)):
        for item in db[i]:
            if item in P:
                P[item].add(i)
            else:
                P[item] = {i}
    P = list(P.items())
    
    condition = lambda x: len(x[1]) >= minsup
    P = [item for item in P if condition(item)]

    P = sorted(P, key=lambda x: x[0])
    F = []
    
    generate_frequent_itemsets(P, minsup, F)

    return [(F[i][0], F[i][1]) for i in range(len(F))]


In [20]:
def getConfidence(sup_xy, sup_x):
    return sup_xy / sup_x

In [21]:
def getLift(conf, sup_y, len_database):
    rsup_y = sup_y / len_database
    lift = conf / rsup_y
    return lift

In [22]:
def getLeverage(sup_xy, sup_x, sup_y, len_database):
    rsup_xy = sup_xy / len_database
    rsup_x = sup_x / len_database
    rsup_y = sup_y / len_database

    leverage = rsup_xy - (rsup_x * rsup_y)
    return leverage

In [23]:
def getJaccard(sup_xy, sup_x, sup_y):
    jaccard_denominator = sup_x + sup_y - sup_xy
    jaccard = sup_xy / jaccard_denominator
    return jaccard

In [24]:
def getConviction(conf, sup_y, len_database):
    rsup_y = sup_y / len_database
    conviction_denominator = 1 - conf
    conviction = (1 - rsup_y) / conviction_denominator
    return conviction

In [25]:
def getOddsRatio(sup_xy, sup_x, sup_y, len_database):
    sup_nox_y = sup_y - sup_xy
    sup_x_noy = sup_x - sup_xy
    sup_nox_noy = len_database - sup_xy - sup_nox_y - sup_x_noy

    odds_denominator = sup_x_noy * sup_nox_y
    odds = (sup_xy * sup_nox_noy) / odds_denominator
    return odds

In [26]:
#Estas serian las ideas de lo que podriamos integrar como conjunto de las metricas
#En caso de que copie y pegue no olvide corregir la parte de la normalizacion con una resta en el denominador

#Promedio Ponderado
def calculate_weighted_score(lift, lev, jacc, conv, odds, weights):
    total_score = np.average([lift, lev, jacc, conv, odds], weights=weights)#numpy porque si jajaja
    return total_score

#Score Agregado
def calculate_combined_score(lift, lev, jacc, conv, odds, alpha, beta, gamma, delta, epsilon):
    total_score = alpha * lift + beta * lev + gamma * jacc + delta * conv + epsilon * odds
    return total_score

#Umbral de Métricas
def get_filtered_recommendations(recommendations, min_lift_threshold, min_lev_threshold, min_jacc_threshold, min_conv_threshold, min_odds_threshold):
    filtered_recommendations = []

    for recommendation in recommendations:
        lift, lev, jacc, conv, odds = get_metrics(recommendation) # Aca seria ver como traer todas las metricas
        if lift >= min_lift_threshold and lev >= min_lev_threshold and jacc >= min_jacc_threshold and conv >= min_conv_threshold and odds >= min_odds_threshold:
            filtered_recommendations.append(recommendation)

    return filtered_recommendations

#Optimización de Hiperparámetros, esta salio de chatgpt y seria el que se usaria con scipy
#Agrego un bloque mas abajo de este de como se podria adaptar potencialmente esta parte a todo el resto del codigo
def objective_function(weights, lift, lev, jacc, conv, odds):
    total_score = calculate_weighted_score(lift, lev, jacc, conv, odds, weights)
    return -total_score  # Negamos para convertir la maximización en minimización

#Ejemplo de uso Optimización de Hiperparámetros
from scipy.optimize import minimize
def optimize_hyperparameters(lift, lev, jacc, conv, odds):
    bounds = [(0, 1)] * 5  # Cinco métricas y cinco pesos correspondientes
    initial_weights = [0.2, 0.2, 0.2, 0.2, 0.2]  # Valores iniciales para los pesos

    result = minimize(objective_function, initial_weights, args=(lift, lev, jacc, conv, odds), bounds=bounds)

    optimal_weights = result.x
    return optimal_weights

# Parámetros de ejemplo
lift = 1.2
lev = 0.8
jacc = 0.5
conv = 0.7
odds = 0.9

# Aca iria las sugerencias como se usaria, esto nos tocaria definirlo manual
#Promedio Ponderado
weights_1 = [0.2, 0.2, 0.2, 0.2, 0.2]
total_score_1 = calculate_weighted_score(lift, lev, jacc, conv, odds, weights_1)

#Score Agregado
alpha, beta, gamma, delta, epsilon = 0.2, 0.2, 0.2, 0.2, 0.2
total_score_2 = calculate_combined_score(lift, lev, jacc, conv, odds, alpha, beta, gamma, delta, epsilon)

#Umbral de Métricas
min_lift_threshold = 1.0
min_lev_threshold = 0.5
filtered_recommendations = get_filtered_recommendations(recommendations, min_lift_threshold, min_lev_threshold)

#Optimización de Hiperparámetros
optimal_weights = optimize_hyperparameters(lift, lev, jacc, conv, odds)

print("Promedio Ponderado:", total_score_1)
print("Score Agregado:", total_score_2)
print("Umbral de Métricas:", filtered_recommendations)
print("Optimización de Hiperparámetros - Optimal Weights:", optimal_weights)

NameError: name 'recommendations' is not defined

In [None]:
"""

Este es el bloque que le comento arriba de como se podria adaptar potencialmente esta parte a todo el resto del codigo


"""
class Recommender:
    def __init__(self):
        self.rules = {}
        self.prices = {}
        self.weights = None  # Añade un atributo para almacenar los pesos optimizados

    # ... (otras funciones)

    def train(self, prices, database) -> None:
        # ... (código existente)

        # Optimiza los hiperparámetros al final del entrenamiento
        self.optimize_hyperparameters()

        return self

    def optimize_hyperparameters(self):
        if not self.rules:
            print("No rules to optimize.")
            return

        # Obtén las métricas (por ejemplo, confidence, lift, jaccard) de tus reglas
        metrics = get_metrics_from_rules(self.rules)

        # Optimiza los pesos de acuerdo a las métricas
        optimal_weights = self.optimize_weights(*metrics)

        self.weights = optimal_weights

    def get_recommendations(self, cart: list, max_recommendations: int) -> list:
        if not self.weights:
            print("Hiperparámetros no optimizados. Realiza el entrenamiento primero.")
            return []

        rules = list(self.rules.keys())
        premises, conclusions = [], []

        for rule in rules:
            premises.append(list(rule[0]))
            conclusions.append(list(rule[1]))

        # ... (resto del código)

        # Usa los pesos optimizados en el cálculo del puntaje total
        total_score = sum(self.weights[i] * metrics[i] for i in range(len(metrics))) / len(metrics)

        # ... (resto del código)


In [None]:
def getStrongRulesFromFrequentSets(fsets, minconf, len_database):
    strong_rules = []
    fsets_sets = [item[0] for item in fsets]
    fsets_supp = [item[1] for item in fsets]
    for i, frequentSet in enumerate(fsets_sets):
        if len(frequentSet) >= 2:
            A = getSubsets(set=frequentSet)
            while len(A) != 0:
                X = A[-1]
                A.remove(X)

                sup_xy = fsets_supp[i]
                
                index_x = fsets_sets.index(X)
                sup_x = fsets_supp[index_x]

                conf = getConfidence(sup_xy, sup_x)
                if conf >= minconf:
                    #Y is the complement of X in the set frequentSet
                    Y = list(frequentSet)
                    for item in X:
                        Y.remove(item)

                    sup_y = fsets_supp[fsets_sets.index(Y)]

                    rsup_xy = sup_xy / len_database
                    lift = getLift(conf, sup_y, len_database)
                    lev = getLeverage(sup_xy, sup_x, sup_y, len_database)
                    jacc = getJaccard(sup_xy, sup_x, sup_y)
                    conv = getConviction(conf, sup_y, len_database)
                    odds = getOddsRatio(sup_xy, sup_x, sup_y, len_database)

                    strong_rules.append((X, Y, (rsup_xy, conf, lift, lev, jacc, conv, odds)))
                else:
                    if len(X) >= 2:
                        W_sets = getSubsets(X)
                        for W in W_sets:
                            if W in A:
                                A.remove(W)

    return strong_rules

def getSubsets(set):
    subsets = []
    x = len(set)
    for i in range(1 << x):
       subsets.append([set[j] for j in range(x) if (i & (1 << j))])

    subsets.pop(-1)
    subsets.pop(0)

    return subsets

In [None]:
def getStrongRulesForDatabase(db, minsup, minconf):
    fsets = eclat(db, minsup)

    len_data = len(db)

    strong_rules = getStrongRulesFromFrequentSets(fsets, minconf, len_data)
    return strong_rules

In [None]:
a= [[3,4],[6,7],[8,9]]
b =list(zip(*a))
b

[(3, 6, 8), (4, 7, 9)]

In [None]:
class Recommender:
    """
        This is the class to make recommendations.
        The class must not require any mandatory arguments for initialization.
    """
    def __init__(self):
        self.rules = {}
        self.prices = {}


    def train(self, prices, database) -> None:
        """
            allows the recommender to learn which items exist, which prices they have, and which items have been purchased together in the past
            :param prices: a list of prices in USD for the items (the item ids are from 0 to the length of this list - 1)
            :param database: a list of lists of item ids that have been purchased together. Every entry corresponds to one transaction
            :return: the object should return itself here (this is actually important!)
        """
        
        rules_db = getStrongRulesForDatabase(db=database, minsup=0.002*len(database), minconf=0.1)
        premises, conclusions, metrics = [], [], []

        for rule in rules_db:
            premises.append(tuple(rule[0]))
            conclusions.append(tuple(rule[1]))
            metrics.append(rule[2])
 
        normalized_metrics = []
        for i in range(len(metrics[0])):
            metric = [x[i] for x in metrics]
            min_metric = min(metric)
            max_metric = max(metric)

            if i == 3:
                print(min_metric)
                print(max_metric)
                
            normalized_metric = []
            for meassure in metric:
                if i == 3:
                    print(normalized_meassure)
                normalized_meassure = (meassure - min_metric) / (max_metric - min_metric)
                normalized_metric.append(normalized_meassure)
            normalized_metrics.append(normalized_metric)

        metrics = list(zip(*normalized_metrics))

        print(metrics[0:5])
        
        temp_rules = list(zip(premises,conclusions))
        for i, rule in enumerate(temp_rules):
            self.rules[rule] = metrics[i]

        for i, price in enumerate(prices):
            self.prices[i] = price

        return self

    def get_recommendations(self, cart:list, max_recommendations:int) -> list:
        """
            makes a recommendation to a specific user
            
            :param cart: a list with the items in the cart
            :param max_recommendations: maximum number of items that may be recommended
            :return: list of at most `max_recommendations` items to be recommended
        """

        rules = list(self.rules.keys())
        premises, conclussions = [],[]
        for rule in rules:
            premises.append(list(rule[0]))
            conclussions.append(list(rule[1]))

        #Gets only the conclusions in which the cart is a subset or equal to the premise
        possible_recommendations = []
        for i, premise in enumerate(premises):
            if (all(x in cart for x in premise)):
                rule = (tuple(premise), tuple(conclussions[i]))
                metrics = self.rules[rule]
                
                total_score = sum(metrics) / len(metrics)

                possible_recommendations.append((conclussions[i], total_score))
        possible_recommendations = sorted(possible_recommendations, key=lambda x:x[1])

        #Gets the (at least) 10 best items according to our evaluation and sorts them by price
        best_recommendations = []
        best_recommendations_prices = []

        for i in range(len(possible_recommendations)):
            if len(best_recommendations) >= 10:
                break
            
            #Add the items in the best rule
            for item in possible_recommendations[-1][0]:
                if item not in best_recommendations:
                    best_recommendations.append(item)
                    best_recommendations_prices.append(self.prices[item])
                possible_recommendations[-1][0].remove(item)
                
            possible_recommendations.pop(-1)
        
        best_recommendations = [x for _, x in sorted(zip(best_recommendations_prices, best_recommendations), key=lambda pair: pair[0])]

        recommendations = []
        i=0
        while i < max_recommendations:
            if len(best_recommendations) == 0:
                break

            if best_recommendations[-1] not in recommendations:
                recommendations.append(best_recommendations.pop(-1))
                i = i + 1
            else:
                best_recommendations.pop(-1)

        if len(recommendations) > 0:
            return recommendations
        else:
            return[0]

In [None]:
with open('prices.json', 'r') as file:
    prices = eval(file.read())

with open('training_data.json', 'r') as file:
    data = eval(file.read())

In [None]:
import random

recommender = Recommender()
recommender = recommender.train(prices=prices, database=data)

carrito = [random.randint(0,99),random.randint(0,99),random.randint(0,99),random.randint(0,99),random.randint(0,99)]
carrito = list(set(carrito))

a = recommender.get_recommendations(cart=carrito, max_recommendations=3)
print(a)
print(list(zip(range(99), prices)))

-0.0002812854442344054
0.023087488185255203
0.003199279300567108
0.0018793005671077505
0.003470734877126654
0.003470734877126654
0.003320037807183365
0.001898322306238185
0.000992781190926276
0.0038814035916824204
0.0038814035916824204
0.0017359404536862002
0.0010639886578449908
0.004123475897920605
0.004123475897920605
0.003023712192816635
0.002520167769376181
0.0027264413988657846
0.0008848416824196598
0.000742048676748582
0.0037759097353497165
0.0037759097353497165
0.0013838610586011336
0.00253429820415879
0.001335633270321361
0.0016072778827977319
0.0032379017013232517
0.0017468690926275995
0.0017815335538752365
0.001278934310018904
0.001177587429111531
0.0028443761814744796
0.0009933482986767485
0.001184180056710775
0.0013467509451795846
0.0025218572778827977
0.0025218572778827977
0.004077599243856331
0.004077599243856331
0.0016315217391304347
0.0015374291115311908
0.0015998700378071834
0.004841103497164461
0.004841103497164461
0.0017494328922495273
0.0017796077504725898
0.0016595