In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.datasets import load_iris

# Load Iris dataset
data = load_iris()
iris_df = pd.DataFrame(data.data, columns=data.feature_names)

# Convert continuous features to categorical bins
bins = {
    'sepal length (cm)': [4, 5, 6, 7, 8],
    'sepal width (cm)': [2, 3, 4],
    'petal length (cm)': [1, 2, 3, 4],
    'petal width (cm)': [0, 1, 2, 3]
}

# Convert continuous variables to categorical variables
for column, bin_edges in bins.items():
    iris_df[column] = pd.cut(iris_df[column], bins=bin_edges, labels=[f"{bin_edges[i]}-{bin_edges[i+1]}" for i in range(len(bin_edges)-1)])

# Add species to the dataset (categorical as well)
iris_df['species'] = pd.Categorical.from_codes(data.target, data.target_names)

# For Apriori, we need binary encoding of the dataset
# Create binary indicator columns for each feature-value pair
def encode(df):
    df_encoded = pd.get_dummies(df)
    return df_encoded

# Encoding the dataframe
encoded_df = encode(iris_df)

# Apply the Apriori algorithm with a minimum support of 8% (0.08) and confidence of 50% (0.5)
min_support = 0.08
min_confidence = 0.5

# Generate frequent itemsets with the Apriori algorithm
frequent_itemsets = apriori(encoded_df, min_support=min_support, use_colnames=True)

# Display the frequent itemsets (support for each itemset)
print("Frequent Itemsets:")
print(frequent_itemsets)


Frequent Itemsets:
     support                                           itemsets
0   0.213333                            (sepal length (cm)_4-5)
1   0.380000                            (sepal length (cm)_5-6)
2   0.326667                            (sepal length (cm)_6-7)
3   0.080000                            (sepal length (cm)_7-8)
4   0.546667                             (sepal width (cm)_2-3)
..       ...                                                ...
88  0.146667  (species_setosa, petal length (cm)_1-2, petal ...
89  0.093333  (sepal width (cm)_2-3, petal width (cm)_1-2, s...
90  0.253333  (petal length (cm)_1-2, sepal width (cm)_3-4, ...
91  0.126667  (petal length (cm)_1-2, petal width (cm)_0-1, ...
92  0.126667  (sepal length (cm)_5-6, petal length (cm)_1-2,...

[93 rows x 2 columns]


In [2]:

# Generate the association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

# Display the association rules
print("\nAssociation Rules:")
print(rules)





Association Rules:
                                        antecedents  \
0                           (sepal length (cm)_4-5)   
1                           (sepal length (cm)_4-5)   
2                           (petal length (cm)_1-2)   
3                           (sepal length (cm)_4-5)   
4                            (petal width (cm)_0-1)   
..                                              ...   
269  (petal length (cm)_1-2, sepal length (cm)_5-6)   
270   (petal width (cm)_0-1, sepal length (cm)_5-6)   
271         (species_setosa, sepal length (cm)_5-6)   
272   (sepal width (cm)_3-4, sepal length (cm)_5-6)   
273   (petal length (cm)_1-2, sepal width (cm)_3-4)   

                                           consequents  antecedent support  \
0                               (sepal width (cm)_3-4)            0.213333   
1                              (petal length (cm)_1-2)            0.213333   
2                              (sepal length (cm)_4-5)            0.326667   
3      

  and should_run_async(code)


In [3]:
# Find the rules with confidence greater than 50%
high_confidence_rules = rules[rules['confidence'] > 0.5]
print("\nHigh Confidence Rules (> 50% confidence):")
print(high_confidence_rules)


High Confidence Rules (> 50% confidence):
                                           antecedents  \
0                              (sepal length (cm)_4-5)   
1                              (sepal length (cm)_4-5)   
2                              (petal length (cm)_1-2)   
3                              (sepal length (cm)_4-5)   
4                               (petal width (cm)_0-1)   
..                                                 ...   
266  (species_setosa, sepal width (cm)_3-4, sepal l...   
269     (petal length (cm)_1-2, sepal length (cm)_5-6)   
270      (petal width (cm)_0-1, sepal length (cm)_5-6)   
271            (species_setosa, sepal length (cm)_5-6)   
272      (sepal width (cm)_3-4, sepal length (cm)_5-6)   

                                           consequents  antecedent support  \
0                               (sepal width (cm)_3-4)            0.213333   
1                              (petal length (cm)_1-2)            0.213333   
2                         

  and should_run_async(code)
