In [1]:
from   mlxtend.frequent_patterns import apriori
from   mlxtend.frequent_patterns import association_rules
import pandas                    as     pd

# **Algorithme Apriori pour l’analyse du panier de consommation**

In [2]:
data = {
    'Transaction ID': [1, 2, 3, 4, 5],
    'Items': [['pain', 'lait'],
              ['pain', 'couche', 'bière', 'œufs'],
              ['lait', 'couche', 'bière', 'coca'],
              ['pain', 'lait', 'couche', 'bière'],
              ['pain', 'lait', 'couche', 'coca']],
}
# Convertir le dataset en DataFrame pandas
df = pd.DataFrame(data)
df

Unnamed: 0,Transaction ID,Items
0,1,"[pain, lait]"
1,2,"[pain, couche, bière, œufs]"
2,3,"[lait, couche, bière, coca]"
3,4,"[pain, lait, couche, bière]"
4,5,"[pain, lait, couche, coca]"


In [3]:
# Convertir la liste des éléments de chaque transaction en string
df['Items'] = df['Items'].apply(lambda x: ','.join(x))
df

Unnamed: 0,Transaction ID,Items
0,1,"pain,lait"
1,2,"pain,couche,bière,œufs"
2,3,"lait,couche,bière,coca"
3,4,"pain,lait,couche,bière"
4,5,"pain,lait,couche,coca"


In [4]:
# Appliquer l'encodage one-hot pour convertir les données de transaction dans un format binaire
onehot = df['Items'].str.get_dummies(sep=',')
onehot

Unnamed: 0,bière,coca,couche,lait,pain,œufs
0,0,0,0,1,1,0
1,1,0,1,0,1,1
2,1,1,1,1,0,0
3,1,0,1,1,1,0
4,0,1,1,1,1,0


## Quels sont les modèles d’achat les plus fréquents et leur score ?

In [5]:
# Appliquer l'algorithme Apriori pour trouver la fréquence des ensembles d'éléments
frequent_itemsets = apriori(onehot, min_support=0.4, use_colnames=True)
frequent_itemsets



Unnamed: 0,support,itemsets
0,0.6,(bière)
1,0.4,(coca)
2,0.8,(couche)
3,0.8,(lait)
4,0.8,(pain)
5,0.6,"(bière, couche)"
6,0.4,"(lait, bière)"
7,0.4,"(pain, bière)"
8,0.4,"(coca, couche)"
9,0.4,"(lait, coca)"


In [6]:
# Générer des règles d'association
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.7)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(bière),(couche),0.6,0.8,0.6,1.0,1.25,1.0,0.12,inf,0.5,0.75,1.0,0.875
1,(couche),(bière),0.8,0.6,0.6,0.75,1.25,1.0,0.12,1.6,1.0,0.75,0.375,0.875
2,(coca),(couche),0.4,0.8,0.4,1.0,1.25,1.0,0.08,inf,0.333333,0.5,1.0,0.75
3,(coca),(lait),0.4,0.8,0.4,1.0,1.25,1.0,0.08,inf,0.333333,0.5,1.0,0.75
4,(lait),(couche),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
5,(couche),(lait),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
6,(pain),(couche),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
7,(couche),(pain),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
8,(pain),(lait),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
9,(lait),(pain),0.8,0.8,0.6,0.75,0.9375,1.0,-0.04,0.8,-0.25,0.6,-0.25,0.75
