# Reglas de asociación

### Docente: M.Sc. Richard Fernández 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from google.colab import files
from mlxtend.frequent_patterns import apriori, association_rules
import io
from mlxtend.preprocessing import TransactionEncoder

In [None]:
uploaded = files.upload()

Saving GroceryStoreDataSet.csv to GroceryStoreDataSet.csv


In [None]:
data = pd.read_csv(io.BytesIO(uploaded['GroceryStoreDataSet.csv']),names = ['products'],sep = ',')
print(data)

                          products
0               MILK,BREAD,BISCUIT
1    BREAD,MILK,BISCUIT,CORNFLAKES
2              BREAD,TEA,BOURNVITA
3             JAM,MAGGI,BREAD,MILK
4                MAGGI,TEA,BISCUIT
5              BREAD,TEA,BOURNVITA
6             MAGGI,TEA,CORNFLAKES
7          MAGGI,BREAD,TEA,BISCUIT
8              JAM,MAGGI,BREAD,TEA
9                       BREAD,MILK
10  COFFEE,COCK,BISCUIT,CORNFLAKES
11  COFFEE,COCK,BISCUIT,CORNFLAKES
12          COFFEE,SUGER,BOURNVITA
13               BREAD,COFFEE,COCK
14             BREAD,SUGER,BISCUIT
15         COFFEE,SUGER,CORNFLAKES
16           BREAD,SUGER,BOURNVITA
17              BREAD,COFFEE,SUGER
18              BREAD,COFFEE,SUGER
19      TEA,MILK,COFFEE,CORNFLAKES


In [None]:
#Examinamos la forma del conjunto de datos
data.shape

(20, 1)

In [None]:
#Dividimos los productos y creamos una lista llamada dataf
dataf = list(data["products"].apply(lambda x:x.split(",") ))
dataf

[['MILK', 'BREAD', 'BISCUIT'],
 ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['JAM', 'MAGGI', 'BREAD', 'MILK'],
 ['MAGGI', 'TEA', 'BISCUIT'],
 ['BREAD', 'TEA', 'BOURNVITA'],
 ['MAGGI', 'TEA', 'CORNFLAKES'],
 ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],
 ['JAM', 'MAGGI', 'BREAD', 'TEA'],
 ['BREAD', 'MILK'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'COCK'],
 ['BREAD', 'SUGER', 'BISCUIT'],
 ['COFFEE', 'SUGER', 'CORNFLAKES'],
 ['BREAD', 'SUGER', 'BOURNVITA'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['BREAD', 'COFFEE', 'SUGER'],
 ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]

In [None]:
#Transformamos la lista en un dataframe de 0 y 1
a = TransactionEncoder()
a_dataf = a.fit(dataf).transform(dataf)
data = pd.DataFrame(a_dataf,columns=a.columns_)
data = data.replace(False,0)
data

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,SUGER,TEA
0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
5,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
6,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
7,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
8,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
9,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [None]:
#Frecuencia de los productos o items
data.sum()

BISCUIT        7.0
BOURNVITA      4.0
BREAD         13.0
COCK           3.0
COFFEE         8.0
CORNFLAKES     6.0
JAM            2.0
MAGGI          5.0
MILK           5.0
SUGER          6.0
TEA            7.0
dtype: float64

In [None]:
#%Frecuenca de los productos o items
pd.DataFrame(data.sum() / data.shape[0], columns = ["Support"]).sort_values("Support", ascending = False)

Unnamed: 0,Support
BREAD,0.65
COFFEE,0.4
BISCUIT,0.35
TEA,0.35
CORNFLAKES,0.3
SUGER,0.3
MAGGI,0.25
MILK,0.25
BOURNVITA,0.2
COCK,0.15


In [None]:
#Aplicando el algoritmo apriori con soporte mínimo de 20%
df = apriori(data, min_support = 0.2, use_colnames = True)
df

Unnamed: 0,support,itemsets
0,0.35,(BISCUIT)
1,0.2,(BOURNVITA)
2,0.65,(BREAD)
3,0.4,(COFFEE)
4,0.3,(CORNFLAKES)
5,0.25,(MAGGI)
6,0.25,(MILK)
7,0.3,(SUGER)
8,0.35,(TEA)
9,0.2,"(BREAD, BISCUIT)"


In [None]:
#Usando una regla de asociación con una confianza mínima de 60%
df_ar = association_rules(df, metric = "confidence", min_threshold = 0.6)
df_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75
1,(SUGER),(BREAD),0.3,0.65,0.2,0.666667,1.025641,0.005,1.05
2,(CORNFLAKES),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
3,(SUGER),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
4,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25


In [None]:
#A. Comentarios
#1.La probabilidad de ver ventas de azúcar se estima en un 30%.

#2.La ingesta de pan se estima en un 65%.

#3.Podemos decir que el apoyo de ambos (azúcar y pan) se mide en un 20%.

#4.El 67% de los que compran azúcar, también compra pan.

#5.Los usuarios que compran azúcar probablemente consuman un 3% más de pan que los usuarios que no compran azúcar.

#6.Su correlación entre sí se considera 1.05.

#B. Sugerencias
#1. La venta cruzada se puede mejorar combinando productos - artículos.
#El diseño del local se puede cambiar para que las ventas se puedan mejorar cuando ciertos artículos se mantienen juntos.

#2. Realizar actividades promocionales para incrementar las ventas de los productos que los clientes no compran.

#3. Ofrecer descuentos colectivos en estos productos si el cliente compra ambos.
