<a href="https://colab.research.google.com/github/Eduardo-Mati/Projetos-integrador---Desafio-unifacisa---Modelos-de-Machine-Learning/blob/main/Q7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


In [25]:
# Criando o dataframe
df = pd.read_csv('Groceries_dataset.csv')

In [26]:
df.head(10)

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
5,4941,14-02-2015,rolls/buns
6,4501,08-05-2015,other vegetables
7,3803,23-12-2015,pot plants
8,2762,20-03-2015,whole milk
9,4119,12-02-2015,tropical fruit


In [27]:
# Removendo fearutes inúteis
df.drop("Date", axis=1, inplace=True)

In [28]:
df.head(10)

Unnamed: 0,Member_number,itemDescription
0,1808,tropical fruit
1,2552,whole milk
2,2300,pip fruit
3,1187,other vegetables
4,3037,whole milk
5,4941,rolls/buns
6,4501,other vegetables
7,3803,pot plants
8,2762,whole milk
9,4119,tropical fruit


In [31]:
# modificando features
df.rename(columns={'Member_number': 'ID', 'itemDescription': 'Itens'}, inplace=True)

In [32]:
df.head(10)

Unnamed: 0,ID,Itens
0,1808,tropical fruit
1,2552,whole milk
2,2300,pip fruit
3,1187,other vegetables
4,3037,whole milk
5,4941,rolls/buns
6,4501,other vegetables
7,3803,pot plants
8,2762,whole milk
9,4119,tropical fruit


In [33]:
# modificando features - Renomeando itemDescription para Itens
df.rename(columns={'itemDescription': 'Itens'}, inplace=True)

# Agrupar por ID e agregar os itens comprados em uma string separada por vírgulas
df_grouped = df.groupby('ID')['Itens'].apply(lambda x: ','.join(x)).reset_index()

# Renomear a coluna agregada para clareza
df_grouped.rename(columns={'Itens': 'Itens Comprados'}, inplace=True)

# Exibir as primeiras linhas do novo DataFrame
display(df_grouped.head())

Unnamed: 0,ID,Itens Comprados
0,1000,"soda,canned beer,sausage,sausage,whole milk,wh..."
1,1001,"frankfurter,frankfurter,beef,sausage,whole mil..."
2,1002,"tropical fruit,butter milk,butter,frozen veget..."
3,1003,"sausage,root vegetables,rolls/buns,detergent,f..."
4,1004,"other vegetables,pip fruit,root vegetables,can..."


In [34]:
# Gerar os itens frequentes (mínimo suporte de 0.02 = 2% das transações)
frequent_itemsets = apriori(df_onehot, min_support=0.02, use_colnames=True)

# Gerar regras de associação
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

# Ordenar pelas regras mais fortes
rules = rules.sort_values(["lift", "confidence"], ascending=[False, False])
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
2933,"(whole milk, sausage)","(rolls/buns, yogurt)",0.106978,0.111339,0.022832,0.213429,1.916929,1.0,0.010921,1.129791,0.535633,0.116798,0.114881,0.209249
2936,"(rolls/buns, yogurt)","(whole milk, sausage)",0.111339,0.106978,0.022832,0.205069,1.916929,1.0,0.010921,1.123396,0.538262,0.116798,0.109842,0.209249
2934,"(whole milk, yogurt)","(rolls/buns, sausage)",0.15059,0.08235,0.022832,0.151618,1.841148,1.0,0.010431,1.081648,0.537856,0.108669,0.075485,0.214438
2935,"(rolls/buns, sausage)","(whole milk, yogurt)",0.08235,0.15059,0.022832,0.277259,1.841148,1.0,0.010431,1.175261,0.497859,0.108669,0.149125,0.214438
2879,"(whole milk, sausage)","(other vegetables, yogurt)",0.106978,0.120318,0.023089,0.215827,1.793806,1.0,0.010217,1.121796,0.495538,0.113065,0.108572,0.203862
2878,"(other vegetables, yogurt)","(whole milk, sausage)",0.120318,0.106978,0.023089,0.191898,1.793806,1.0,0.010217,1.105085,0.503052,0.113065,0.095093,0.203862
2740,"(bottled water, yogurt)","(whole milk, other vegetables)",0.066444,0.19138,0.022063,0.332046,1.735009,1.0,0.009346,1.210593,0.453786,0.09358,0.173958,0.223664
2737,"(whole milk, other vegetables)","(bottled water, yogurt)",0.19138,0.066444,0.022063,0.115282,1.735009,1.0,0.009346,1.055201,0.523898,0.09358,0.052313,0.223664
2932,"(whole milk, rolls/buns)","(sausage, yogurt)",0.178553,0.075423,0.022832,0.127874,1.695412,1.0,0.009365,1.060141,0.49933,0.098779,0.056729,0.215297
2937,"(sausage, yogurt)","(whole milk, rolls/buns)",0.075423,0.178553,0.022832,0.302721,1.695412,1.0,0.009365,1.178075,0.443633,0.098779,0.151158,0.215297


Pergunta: Quais foram as regras de associação mais relevantes? Como elas podem ser aplicadas para aumentar as vendas?

Na tabela acima há o Suporte(support), confiança(confidence) e lift, tendo também os valores anteriores(antecedents) e as recomendações(consequents)

A estratégia para a melhoria das vendas depende do dono do mercado que receberá essas informações, mas o melhor seria aproveitar a previsão e apróximar as prateleiras de intens relacionados para que eles tenham a tentação de comprar mais