# Simulate  Transaction data

In [14]:
# Load necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# create 10 fake transactions
# each transaction should have 2-5 items selected from a pool of at least 8 unique items
data = [
    ['Milk', 'Bread', 'Eggs','Butter'],
    ['Bread', 'Butter', 'Cheese', 'Eggs', 'Milk'],
    ['Milk', 'Eggs', 'Cheese'],
    ['Bread', 'Cheese'],
    ['Milk', 'Butter','Eggs'],
    ['Eggs', 'Bread', 'Cheese'],
    ['Cheese', 'Butter'],
    ['Milk', 'Eggs','Cheese', 'Bread'],
    ['Bread', 'Cheese', 'Butter'],
    ['Milk', 'Eggs',]
]

# Analyze with Apriori

In [15]:
# convert the data into a one-hot encoded format using pandas
all_items= sorted(set(item for transaction in data for item in transaction))
encoded_data= []

for transaction in data:
    encoded_data.append({item: (item in transaction) for item in all_items})
df = pd.DataFrame(encoded_data)
#use Apriori alogorithm to find frequent itemesets
# set minimum support to 0.3(30%)
frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)

# display the frequent itemsets
print("Frequent Itemsets:\n", frequent_itemsets)

Frequent Itemsets:
     support               itemsets
0       0.6                (Bread)
1       0.5               (Butter)
2       0.7               (Cheese)
3       0.7                 (Eggs)
4       0.6                 (Milk)
5       0.3        (Bread, Butter)
6       0.5        (Bread, Cheese)
7       0.4          (Eggs, Bread)
8       0.3          (Milk, Bread)
9       0.3       (Butter, Cheese)
10      0.3         (Eggs, Butter)
11      0.3         (Milk, Butter)
12      0.4         (Eggs, Cheese)
13      0.3         (Milk, Cheese)
14      0.6           (Milk, Eggs)
15      0.3  (Eggs, Bread, Cheese)
16      0.3    (Milk, Eggs, Bread)
17      0.3   (Milk, Eggs, Butter)
18      0.3   (Milk, Eggs, Cheese)


# Generate Rules

In [19]:
# generate association rules with a metric of confidence and a minimum threshold of 0.7
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
# show at least 2 rules and briefly explain what one rule means
print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence','leverage']])


Association Rules:
        antecedents consequents  support  confidence  leverage
0          (Bread)    (Cheese)      0.5    0.833333      0.08
1         (Cheese)     (Bread)      0.5    0.714286      0.08
2           (Milk)      (Eggs)      0.6    1.000000      0.18
3           (Eggs)      (Milk)      0.6    0.857143      0.18
4    (Eggs, Bread)    (Cheese)      0.3    0.750000      0.02
5   (Eggs, Cheese)     (Bread)      0.3    0.750000      0.06
6    (Milk, Bread)      (Eggs)      0.3    1.000000      0.09
7    (Eggs, Bread)      (Milk)      0.3    0.750000      0.06
8   (Milk, Butter)      (Eggs)      0.3    1.000000      0.09
9   (Eggs, Butter)      (Milk)      0.3    1.000000      0.12
10  (Milk, Cheese)      (Eggs)      0.3    1.000000      0.09
11  (Eggs, Cheese)      (Milk)      0.3    0.750000      0.06
