# Bibliotecas

In [88]:
import pandas as pd
import matplotlib.pyplot

## Carregamento dos dados

In [33]:
dados = pd.read_csv('dados-padaria.csv')
dados

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend
...,...,...,...,...,...
20502,9682,Coffee,2017-09-04 14:32:58,Afternoon,Weekend
20503,9682,Tea,2017-09-04 14:32:58,Afternoon,Weekend
20504,9683,Coffee,2017-09-04 14:57:06,Afternoon,Weekend
20505,9683,Pastry,2017-09-04 14:57:06,Afternoon,Weekend


## Análise exploratoria e tratamento dos dados

In [34]:
#dataset com 5 colunas e quase 21 linhas
dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   TransactionNo  20507 non-null  int64 
 1   Items          20507 non-null  object
 2   DateTime       20507 non-null  object
 3   Daypart        20507 non-null  object
 4   DayType        20507 non-null  object
dtypes: int64(1), object(4)
memory usage: 801.2+ KB


In [35]:
#vou fazer um tratamento para isolar apenas os id de transações unicas
#isso é necessario porque as vezes mais de 1 item é vendido na transação 
# e a ideia é isolar o que foi vendido em cada transacao

transacao = []

for item in dados['TransactionNo'].unique():
    lista= list(set(dados[dados['TransactionNo'] == item]['Items']))
    transacao.append(lista)

In [36]:
#primeiras 5 linhas 
transacao [0:5]

[['Bread'],
 ['Scandinavian'],
 ['Jam', 'Cookies', 'Hot chocolate'],
 ['Muffin'],
 ['Pastry', 'Coffee', 'Bread']]

In [37]:
#instalei e chamei a extensão mlxtend para fazer essa regra associativa
# MLXTEND é uma biblioteca que cria extensões de aplicações de machine learning

!pip install mlxtend



- ### Um tratamento necessario é converter as listas de itens em um dataframe com as transações que o ML possar processar, pois esse somente reconhece binario
- ### TransctionEncoder() utiliza o one hot encoding para transformar os dados categoricos em valores boleanos

In [38]:
#carregamento da libs
from mlxtend.preprocessing import TransactionEncoder

In [39]:
#chamo o instanciamento dessa biblioteca
te= TransactionEncoder()

In [40]:
#abro uma variavel para armazenar meus dados transformados
transacao_te = te.fit(transacao).transform(transacao)

In [41]:
transacao_te

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [43]:
#transformo alista acima de array para um dataframe
transacao_transformada = pd.DataFrame(transacao_te, columns = te.columns_)
transacao_transformada

Unnamed: 0,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,The BART,The Nomad,Tiffin,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9460,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9461,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
9462,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9463,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


# Algoritimo APRIORI

In [46]:
#carregamento da libs para suporte
from mlxtend.frequent_patterns import apriori

In [52]:
#nessa etapa eu faço a separação dos conjuntos frequentes 
#suporte minimo como 0.032
# e já classfico para os dados aparecer em ordem crescente


items_frequentes_apriori = apriori(transacao_transformada, use_colnames=True, min_support=0.02)
items_frequentes_apriori.sort_values(['support'], ascending=False)

Unnamed: 0,support,itemsets
4,0.478394,(Coffee)
1,0.327205,(Bread)
16,0.142631,(Tea)
3,0.103856,(Cake)
20,0.090016,"(Coffee, Bread)"
11,0.086107,(Pastry)
12,0.071844,(Sandwich)
9,0.061807,(Medialuna)
7,0.05832,(Hot chocolate)
23,0.054728,"(Cake, Coffee)"


In [54]:
#carregamento da libs para regras de associacao
from mlxtend.frequent_patterns import association_rules

In [58]:
#variavel para armazenar o conjunto de itens frequentes 
#metrica de destaque confiança com minimo de 0.5 isso significa que se estiver dentro de 0.5 já considero valida as informações

#então nas minhas informações eu tenho antecedentes que o cliente compra e o consequente das compras também
#isso inclui as metricas de suporte para essa analise

regras_apriori = association_rules(items_frequentes_apriori, metric='confidence', min_threshold=0.5)
regras_apriori

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664
1,(Cookies),(Coffee),0.054411,0.478394,0.028209,0.518447,1.083723,0.002179,1.083174
2,(Hot chocolate),(Coffee),0.05832,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553
3,(Juice),(Coffee),0.038563,0.478394,0.020602,0.534247,1.11675,0.002154,1.119919
4,(Medialuna),(Coffee),0.061807,0.478394,0.035182,0.569231,1.189878,0.005614,1.210871
5,(Pastry),(Coffee),0.086107,0.478394,0.047544,0.552147,1.154168,0.006351,1.164682
6,(Sandwich),(Coffee),0.071844,0.478394,0.038246,0.532353,1.112792,0.003877,1.115384
7,(Toast),(Coffee),0.033597,0.478394,0.023666,0.704403,1.472431,0.007593,1.764582
