In [2]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, association_rules
import random

df = pd.read_csv('basket_analysis.csv', sep=',', header=0, index_col=0)

df

Unnamed: 0,Apple,Bread,Butter,Cheese,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Sugar,Unicorn,Yogurt,chocolate
0,False,True,False,False,True,True,False,True,False,False,False,False,True,False,True,True
1,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False
2,True,False,True,False,False,True,False,True,False,True,False,False,False,False,True,True
3,False,False,True,True,False,True,False,False,False,True,True,True,False,False,False,False
4,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,True
995,True,False,False,False,True,False,False,False,True,True,True,False,False,False,True,False
996,True,False,False,False,True,True,False,False,False,False,False,False,True,False,False,True
997,False,False,True,True,True,False,True,True,True,False,True,False,True,False,True,True


In [27]:
def apply_csp(df, must_have_items, must_not_have_items):
    # Mantener solo las transacciones que contienen al menos uno de los items
    df = df[df[must_have_items].sum(axis=1) > 0]
    # Eliminar las transacciones que contienen alguno de los items
    df = df[df[must_not_have_items].sum(axis=1) == 0]
    df = df.drop(columns=must_not_have_items)
    return df

In [50]:
print(f'Numero de columnas: {len(df.columns)}')
print(df.columns)

Numero de columnas: 16
Index(['Apple', 'Bread', 'Butter', 'Cheese', 'Corn', 'Dill', 'Eggs',
       'Ice cream', 'Kidney Beans', 'Milk', 'Nutmeg', 'Onion', 'Sugar',
       'Unicorn', 'Yogurt', 'chocolate'],
      dtype='object')


In [29]:
must_have_items = ['Apple', 'Bread', 'Butter', 'Cheese', 'Corn', 'Dill', 'Eggs',
       'Ice cream', 'Kidney Beans', 'Milk', 'Nutmeg', 'Onion', 'Sugar',
       'Unicorn', 'Yogurt', 'chocolate']
must_not_have_items =['Dill']


df_filtered = apply_csp(df, must_have_items, must_not_have_items)

In [30]:
df_filtered.columns

Index(['Apple', 'Bread', 'Butter', 'Cheese', 'Corn', 'Eggs', 'Ice cream',
       'Kidney Beans', 'Milk', 'Nutmeg', 'Onion', 'Sugar', 'Unicorn', 'Yogurt',
       'chocolate'],
      dtype='object')

In [42]:
# Numero total de transacciones
total_transactions_filtered = df_filtered.shape[0]

# Numero de items unicos
unique_items_filtered = df_filtered.columns

print(f'Número total de transacciones: {total_transactions_filtered}')
print(f'Número de items únicos: {len(unique_items_filtered)}')
print(f'Items únicos: {unique_items_filtered}')

Número total de transacciones: 601
Número de items únicos: 15
Items únicos: Index(['Apple', 'Bread', 'Butter', 'Cheese', 'Corn', 'Eggs', 'Ice cream',
       'Kidney Beans', 'Milk', 'Nutmeg', 'Onion', 'Sugar', 'Unicorn', 'Yogurt',
       'chocolate'],
      dtype='object')


In [43]:
# Encontrar itemsets frecuentes
frequent_itemsets = fpgrowth(df, min_support=0.1, use_colnames=True)

# Encontrar reglas de asociacion
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.15)

print(frequent_itemsets)
print(rules)

      support           itemsets
0    0.421421        (chocolate)
1    0.420420           (Yogurt)
2    0.410410        (Ice cream)
3    0.409409            (Sugar)
4    0.407407             (Corn)
..        ...                ...
164  0.169169     (Eggs, Cheese)
165  0.182182  (chocolate, Eggs)
166  0.180180       (Eggs, Corn)
167  0.168168    (Unicorn, Eggs)
168  0.157157       (Eggs, Dill)

[169 rows x 2 columns]
     antecedents  consequents  antecedent support  consequent support  \
0       (Yogurt)  (chocolate)            0.420420            0.421421   
1    (chocolate)     (Yogurt)            0.421421            0.420420   
2    (chocolate)  (Ice cream)            0.421421            0.410410   
3    (Ice cream)  (chocolate)            0.410410            0.421421   
4       (Yogurt)  (Ice cream)            0.420420            0.410410   
..           ...          ...                 ...                 ...   
433       (Corn)       (Eggs)            0.407407            0.384384

In [46]:
def recommend_products_with_constraints(purchased_items, must_have_items, must_not_have_items):
    relevant_rules = pd.DataFrame()

    for item in purchased_items:
        item_rules = rules[rules['antecedents'].apply(lambda x: item in set(x))]
        relevant_rules = pd.concat([relevant_rules, item_rules])

    relevant_rules = relevant_rules[~relevant_rules['consequents'].apply(lambda x: any(item in set(x) for item in must_not_have_items))]

    relevant_rules = relevant_rules.sort_values(by='confidence', ascending=False)

    if not relevant_rules.empty:
        recommended_products = list(relevant_rules.iloc[0]['consequents'])
        return recommended_products
    else:
        return []

In [48]:
for _ in range(10):
    random_items = random.sample(list(unique_items_filtered), k=random.randint(1, 3))

    recommended_products = recommend_products_with_constraints(random_items, must_have_items, must_not_have_items)

    print(f'Items seleccionados: {random_items}')
    print(f'Productos recomendados: {recommended_products}\n')

Items seleccionados: ['Onion']
Productos recomendados: ['Nutmeg']

Items seleccionados: ['chocolate', 'Kidney Beans']
Productos recomendados: ['Milk']

Items seleccionados: ['Sugar', 'Eggs', 'Yogurt']
Productos recomendados: ['Butter']

Items seleccionados: ['chocolate', 'Yogurt']
Productos recomendados: ['Milk']

Items seleccionados: ['Cheese']
Productos recomendados: ['Onion']

Items seleccionados: ['Eggs', 'Corn']
Productos recomendados: ['Kidney Beans']

Items seleccionados: ['Unicorn', 'Ice cream', 'Milk']
Productos recomendados: ['chocolate']

Items seleccionados: ['Cheese', 'Kidney Beans']
Productos recomendados: ['Onion']

Items seleccionados: ['chocolate']
Productos recomendados: ['Milk']

Items seleccionados: ['Sugar', 'Butter']
Productos recomendados: ['Ice cream']

