In [19]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt

# Charger le jeu de données Adult
data = pd.read_csv("adult.csv")

# Prétraitement des données si nécessaire

# Sélectionner les colonnes pertinentes pour l'exploration de règles d'association
selected_columns = ["age","workclass","fnlwgt","education","education.num","marital.status","occupation","relationship","race","sex","capital.gain","capital.loss","hours.per.week","native.country","income"]
data_selected = data[selected_columns]

# Encoder les données catégorielles avec OneHotEncoder
encoder = OneHotEncoder(sparse=False)
data_encoded = pd.DataFrame(encoder.fit_transform(data_selected), columns=encoder.get_feature_names_out(selected_columns))

# Tracer l'histogramme des fréquences d'éléments


# Extraction des itemsets fréquents
frequent_itemsets = apriori(data_encoded, min_support=0.1, use_colnames=True)

# Extraction des règles d'association avec un seuil de confiance de 0.6
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.8)

# Afficher les règles d'association
print(rules.head(10))

# Filtrer les règles selon les critères souhaités
filtered_rules = rules[(rules["consequents"].apply(lambda x: "income=small" in x)) & (rules["lift"] > 2)]

# Afficher les règles filtrées
rules_sorted = filtered_rules.sort_values(by='lift', ascending=False)
print(rules_sorted)




             antecedents                     consequents  antecedent support  \
0    (workclass_Private)                    (race_White)            0.697030   
1    (workclass_Private)                (capital.gain_0)            0.697030   
2    (workclass_Private)                (capital.loss_0)            0.697030   
3    (workclass_Private)  (native.country_United-States)            0.697030   
4     (education.num_13)           (education_Bachelors)            0.164461   
5  (education_Bachelors)              (education.num_13)            0.164461   
6  (education_Bachelors)                    (race_White)            0.164461   
7  (education_Bachelors)                (capital.gain_0)            0.164461   
8  (education_Bachelors)                (capital.loss_0)            0.164461   
9  (education_Bachelors)  (native.country_United-States)            0.164461   

   consequent support   support  confidence      lift  leverage  conviction  
0            0.854274  0.595928    0.8549