# **Sistemas de Recomendação com Apriori (Prática com Python) - Machine Learning 23.2**

### Bibliotecas

In [1]:
# Libs
import numpy as np
import pandas as pd

### Base de dados

In [2]:
# dados
df = pd.read_csv('../data/groceries - groceries.csv')
df = df.drop('Item(s)', axis=1)
df.replace(np.nan, 0, inplace=True)
df.head()

Unnamed: 0,Item 1,Item 2,Item 3,Item 4,Item 5,Item 6,Item 7,Item 8,Item 9,Item 10,...,Item 23,Item 24,Item 25,Item 26,Item 27,Item 28,Item 29,Item 30,Item 31,Item 32
0,citrus fruit,semi-finished bread,margarine,ready soups,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,tropical fruit,yogurt,coffee,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,whole milk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,pip fruit,yogurt,cream cheese,meat spreads,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,other vegetables,whole milk,condensed milk,long life bakery product,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Transformando a base de dados

In [3]:
# função que remove os zeros
def removendoOsZeros(l):
    return list(filter(lambda x: x != 0, l))

# Lista com todas as transação 
lista_todas_transacao = []

# inteirando a base de dados
for idx, row in df.iterrows():
    lista_de_transacao = row.values.tolist()
    lista_de_transacao = removendoOsZeros(lista_de_transacao)

    lista_todas_transacao.append(lista_de_transacao)


lista_todas_transacao[:10]

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product'],
 ['whole milk', 'butter', 'yogurt', 'rice', 'abrasive cleaner'],
 ['rolls/buns'],
 ['other vegetables',
  'UHT-milk',
  'rolls/buns',
  'bottled beer',
  'liquor (appetizer)'],
 ['potted plants'],
 ['whole milk', 'cereals']]

### Transformação dos Dados

#### Biblioteca

In [4]:
# Libs
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [5]:
# Transformação 
te = TransactionEncoder()
te_ary = te.fit(lista_todas_transacao).transform(lista_todas_transacao)
df = pd.DataFrame(te_ary, columns=te.columns_)

df.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


### Implementando o Algorítimo Apriori  

In [6]:
# Items frequentes
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)
frequent_itemsets.sort_values(by=['support'], ascending=False).head(10)

Unnamed: 0,support,itemsets
86,0.255516,(whole milk)
55,0.193493,(other vegetables)
66,0.183935,(rolls/buns)
75,0.174377,(soda)
87,0.139502,(yogurt)
6,0.110524,(bottled water)
67,0.108998,(root vegetables)
81,0.104931,(tropical fruit)
73,0.098526,(shopping bags)
70,0.09395,(sausage)


In [7]:
# criando as regras 
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.3)
rules.sort_values(by=['lift'], ascending=False).drop([
        'antecedent support', 'consequent support',
        'leverage', 'conviction', 'zhangs_metric'
    ],axis=1
)

Unnamed: 0,antecedents,consequents,support,confidence,lift
74,"(other vegetables, citrus fruit)",(root vegetables),0.010371,0.359155,3.295045
96,"(other vegetables, tropical fruit)",(root vegetables),0.012303,0.342776,3.144780
1,(beef),(root vegetables),0.017387,0.331395,3.040367
73,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
95,"(root vegetables, tropical fruit)",(other vegetables),0.012303,0.584541,3.020999
...,...,...,...,...,...
65,(waffles),(whole milk),0.012710,0.330688,1.294196
17,(coffee),(whole milk),0.018709,0.322242,1.261141
61,(sausage),(whole milk),0.029893,0.318182,1.245252
6,(bottled water),(whole milk),0.034367,0.310948,1.216940
