# Sistemas de Recomendação com Apriori

## Carregando Base de Dados

In [1]:
!rm -r MLUD
!git clone https://github.com/Crissky/MLUD.git

Cloning into 'MLUD'...
remote: Enumerating objects: 20, done.[K
remote: Counting objects: 100% (20/20), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 395 (delta 13), reused 7 (delta 3), pack-reused 375[K
Receiving objects: 100% (395/395), 7.27 MiB | 18.29 MiB/s, done.
Resolving deltas: 100% (171/171), done.


## Instalando MLXTEND

In [2]:
!pip install mlxtend



## Imports

In [3]:
import pandas as pd
import numpy as np

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

## Criando DataFrame e Substituindo NaN por Zeros 

In [4]:
df = pd.read_csv('MLUD/Aula23/compras.csv', header=None)
df.replace(np.nan, 0, inplace=True)
df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0,citrus fruit,semi-finished bread,margarine,ready soups,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,tropical fruit,yogurt,coffee,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,whole milk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,pip fruit,yogurt,cream cheese,meat spreads,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,other vegetables,whole milk,condensed milk,long life bakery product,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,whole milk,butter,yogurt,rice,abrasive cleaner,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,rolls/buns,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,other vegetables,UHT-milk,rolls/buns,bottled beer,liquor (appetizer),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,potted plants,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,whole milk,cereals,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Criando uma lista de Listas de Transações (Removendo os Zeros)

In [5]:
list_all_transactions = list()

for index, row in df.iterrows():
    transaction = row.values.tolist()
    transaction = filter(lambda x: x != 0, transaction)
    transaction = list(transaction)

    list_all_transactions.append(transaction)

print(len(list_all_transactions))

9835


## Transformando Listas de Transações em One-hot encoding

In [6]:
te = TransactionEncoder()
te_array = te.fit(list_all_transactions).transform(list_all_transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

df.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,berries,beverages,bottled beer,bottled water,brandy,brown bread,butter,butter milk,cake bar,candles,candy,canned beer,canned fish,canned fruit,canned vegetables,cat food,cereals,chewing gum,chicken,chocolate,chocolate marshmallow,citrus fruit,cleaner,cling film/bags,cocoa drinks,coffee,condensed milk,cooking chocolate,cookware,cream,...,salty snack,sauces,sausage,seasonal products,semi-finished bread,shopping bags,skin care,sliced cheese,snack products,soap,soda,soft cheese,softener,sound storage medium,soups,sparkling wine,specialty bar,specialty cheese,specialty chocolate,specialty fat,specialty vegetables,spices,spread cheese,sugar,sweet spreads,syrup,tea,tidbits,toilet cleaner,tropical fruit,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False


## Construindo DataFrame de Frequencias com Apriori

In [7]:
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

frequent_itemsets.sort_values(by=['support'], ascending=False)

Unnamed: 0,support,itemsets
86,0.255516,(whole milk)
55,0.193493,(other vegetables)
66,0.183935,(rolls/buns)
75,0.174377,(soda)
87,0.139502,(yogurt)
...,...,...
178,0.010066,"(frankfurter, sausage)"
306,0.010066,"(yogurt, curd, whole milk)"
160,0.010066,"(rolls/buns, curd)"
212,0.010066,"(tropical fruit, napkins)"


## Aplicando Regras de Associação (DataFrame)

In [8]:
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(butter, other vegetables)",(whole milk),0.020031,0.255516,0.01149,0.573604,2.244885,0.006371,1.745992
1,"(root vegetables, citrus fruit)",(other vegetables),0.017692,0.193493,0.010371,0.586207,3.029608,0.006948,1.949059
2,"(curd, yogurt)",(whole milk),0.017285,0.255516,0.010066,0.582353,2.279125,0.005649,1.782567
3,"(other vegetables, domestic eggs)",(whole milk),0.022267,0.255516,0.012303,0.552511,2.162336,0.006613,1.663694
4,"(other vegetables, pip fruit)",(whole milk),0.026131,0.255516,0.013523,0.51751,2.025351,0.006846,1.543003
5,"(root vegetables, rolls/buns)",(other vegetables),0.024301,0.193493,0.012201,0.502092,2.59489,0.007499,1.619792
6,"(root vegetables, tropical fruit)",(other vegetables),0.021047,0.193493,0.012303,0.584541,3.020999,0.008231,1.941244
7,"(root vegetables, yogurt)",(other vegetables),0.025826,0.193493,0.012913,0.5,2.584078,0.007916,1.613015
8,"(whipped/sour cream, other vegetables)",(whole milk),0.028876,0.255516,0.014642,0.507042,1.984385,0.007263,1.510239
9,"(other vegetables, yogurt)",(whole milk),0.043416,0.255516,0.022267,0.512881,2.007235,0.011174,1.52834


## Removendo colunas do DataFrame de Regras para melhorar a visualização

In [9]:
rules.sort_values(by=['lift'], ascending=False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1)

Unnamed: 0,antecedents,consequents,support,confidence,lift
1,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
6,"(root vegetables, tropical fruit)",(other vegetables),0.012303,0.584541,3.020999
5,"(root vegetables, rolls/buns)",(other vegetables),0.012201,0.502092,2.59489
7,"(root vegetables, yogurt)",(other vegetables),0.012913,0.5,2.584078
2,"(curd, yogurt)",(whole milk),0.010066,0.582353,2.279125
0,"(butter, other vegetables)",(whole milk),0.01149,0.573604,2.244885
11,"(root vegetables, tropical fruit)",(whole milk),0.011998,0.570048,2.230969
12,"(root vegetables, yogurt)",(whole milk),0.01454,0.562992,2.203354
3,"(other vegetables, domestic eggs)",(whole milk),0.012303,0.552511,2.162336
14,"(whipped/sour cream, yogurt)",(whole milk),0.01088,0.52451,2.052747
