# Reglas de asociación

In [None]:
! pip install mlxtend

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
df = pd.read_csv("/content/GroceryStoreDataSet.csv", names=["products"])
df.head(10)

Unnamed: 0,products
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"
5,"BREAD,TEA,BOURNVITA"
6,"MAGGI,TEA,CORNFLAKES"
7,"MAGGI,BREAD,TEA,BISCUIT"
8,"JAM,MAGGI,BREAD,TEA"
9,"BREAD,MILK"


In [None]:
# Dividimos las transacciones en listas
transacciones = df["products"].str.split(",")
transacciones

0                  [MILK, BREAD, BISCUIT]
1      [BREAD, MILK, BISCUIT, CORNFLAKES]
2                 [BREAD, TEA, BOURNVITA]
3               [JAM, MAGGI, BREAD, MILK]
4                   [MAGGI, TEA, BISCUIT]
5                 [BREAD, TEA, BOURNVITA]
6                [MAGGI, TEA, CORNFLAKES]
7            [MAGGI, BREAD, TEA, BISCUIT]
8                [JAM, MAGGI, BREAD, TEA]
9                           [BREAD, MILK]
10    [COFFEE, COCK, BISCUIT, CORNFLAKES]
11    [COFFEE, COCK, BISCUIT, CORNFLAKES]
12             [COFFEE, SUGAR, BOURNVITA]
13                  [BREAD, COFFEE, COCK]
14                [BREAD, SUGAR, BISCUIT]
15            [COFFEE, SUGAR, CORNFLAKES]
16              [BREAD, SUGAR, BOURNVITA]
17                 [BREAD, COFFEE, SUGAR]
18                 [BREAD, COFFEE, SUGAR]
19        [TEA, MILK, COFFEE, CORNFLAKES]
Name: products, dtype: object

In [None]:
# Codificamos las transacciones como dummies por producto
te = TransactionEncoder()
te_fit = te.fit_transform(transacciones).astype("int")
df_rules = pd.DataFrame(te_fit, columns=te.columns_)
df_rules

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,SUGAR,TEA
0,1,0,1,0,0,0,0,0,1,0,0
1,1,0,1,0,0,1,0,0,1,0,0
2,0,1,1,0,0,0,0,0,0,0,1
3,0,0,1,0,0,0,1,1,1,0,0
4,1,0,0,0,0,0,0,1,0,0,1
5,0,1,1,0,0,0,0,0,0,0,1
6,0,0,0,0,0,1,0,1,0,0,1
7,1,0,1,0,0,0,0,1,0,0,1
8,0,0,1,0,0,0,1,1,0,0,1
9,0,0,1,0,0,0,0,0,1,0,0


In [None]:
# Calculamos los itemsets frecuentes con soporte mínimo 0.2
df_freq_itemsets = apriori(df_rules, use_colnames=True, min_support = 0.2) 
df_freq_itemsets.sort_values(by='support', ascending=False)

Unnamed: 0,support,itemsets
2,0.65,(BREAD)
3,0.4,(COFFEE)
0,0.35,(BISCUIT)
8,0.35,(TEA)
4,0.3,(CORNFLAKES)
7,0.3,(SUGAR)
5,0.25,(MAGGI)
6,0.25,(MILK)
1,0.2,(BOURNVITA)
9,0.2,"(BISCUIT, BREAD)"


In [None]:
# Calculamos las reglas de asociación a partir de los itemsets frecuentes
# con confianza mínima de 0.6
df_ar = association_rules(df_freq_itemsets, metric="confidence", min_threshold=0.6)
df_ar.sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75
4,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25
1,(SUGAR),(BREAD),0.3,0.65,0.2,0.666667,1.025641,0.005,1.05
2,(CORNFLAKES),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
3,(SUGAR),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
