In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [4]:
data = {
    'Transaction_ID': ['T100', 'T200', 'T300', 'T400', 'T500'],
    'Items_bought': [
        {'M', 'O', 'N', 'K', 'E', 'Y'},
        {'D', 'O', 'N', 'K', 'E', 'Y'},
        {'M', 'A', 'K', 'E'},
        {'M', 'U', 'C', 'K', 'Y'},
        {'C', 'O', 'O', 'K', 'I', 'E'}
    ]
}

df = pd.DataFrame(data)

In [5]:
df['Items_bought'] = df['Items_bought'].apply(list)

In [6]:
oht = pd.get_dummies(df['Items_bought'].explode()).groupby(level=0).max()

In [7]:
step = 1

In [8]:
print(f"Step {step}: Count of each item (C1):")
c1 = oht.sum()
print(c1)

Step 1: Count of each item (C1):
A    1
C    2
D    1
E    4
I    1
K    5
M    3
N    2
O    3
U    1
Y    3
dtype: int64


In [9]:
step += 1

In [10]:
min_support = 0.6
print(f"\nStep {step}: Frequent one-itemset (L1) with support >= {min_support}:")
l1 = c1[c1 >= min_support]
print(l1)


Step 2: Frequent one-itemset (L1) with support >= 0.6:
A    1
C    2
D    1
E    4
I    1
K    5
M    3
N    2
O    3
U    1
Y    3
dtype: int64


In [11]:
step += 1

In [12]:
print(f"\nStep {step}: Generating two-itemsets (C2):")
c2 = apriori(oht, min_support=min_support, use_colnames=True)
print(c2)


Step 3: Generating two-itemsets (C2):
    support   itemsets
0       0.8        (E)
1       1.0        (K)
2       0.6        (M)
3       0.6        (O)
4       0.6        (Y)
5       0.8     (E, K)
6       0.6     (O, E)
7       0.6     (M, K)
8       0.6     (O, K)
9       0.6     (Y, K)
10      0.6  (O, E, K)


In [13]:
step += 1

In [15]:
print(f"\nStep {step}: Frequent two-itemsets (L2) with support >= {min_support}:")
l2 = c2[c2['support'] >= min_support]
print(l2)


Step 4: Frequent two-itemsets (L2) with support >= 0.6:
    support   itemsets
0       0.8        (E)
1       1.0        (K)
2       0.6        (M)
3       0.6        (O)
4       0.6        (Y)
5       0.8     (E, K)
6       0.6     (O, E)
7       0.6     (M, K)
8       0.6     (O, K)
9       0.6     (Y, K)
10      0.6  (O, E, K)


In [16]:
step += 1

In [17]:
print(f"\nStep {step}: Generating candidates for three-itemsets (C3):")
c3 = apriori(oht, min_support=min_support, use_colnames=True)
print(c3)


Step 5: Generating candidates for three-itemsets (C3):
    support   itemsets
0       0.8        (E)
1       1.0        (K)
2       0.6        (M)
3       0.6        (O)
4       0.6        (Y)
5       0.8     (E, K)
6       0.6     (O, E)
7       0.6     (M, K)
8       0.6     (O, K)
9       0.6     (Y, K)
10      0.6  (O, E, K)


In [18]:
step += 1

In [19]:
print(f"\nStep {step}: Stopping as there are no candidates for three-itemsets.")


Step 6: Stopping as there are no candidates for three-itemsets.


In [20]:
print("\nFinal Frequent Itemsets (L2):")
print(l2)


Final Frequent Itemsets (L2):
    support   itemsets
0       0.8        (E)
1       1.0        (K)
2       0.6        (M)
3       0.6        (O)
4       0.6        (Y)
5       0.8     (E, K)
6       0.6     (O, E)
7       0.6     (M, K)
8       0.6     (O, K)
9       0.6     (Y, K)
10      0.6  (O, E, K)


In [21]:
min_confidence = 0.8

In [22]:
association_rules_df = association_rules(l2, metric="confidence", min_threshold=min_confidence)

In [23]:
print("\nStrong Association Rules with Confidence >= 80%:")
print(association_rules_df)


Strong Association Rules with Confidence >= 80%:
  antecedents consequents  antecedent support  consequent support  support  \
0         (E)         (K)                 0.8                 1.0      0.8   
1         (K)         (E)                 1.0                 0.8      0.8   
2         (O)         (E)                 0.6                 0.8      0.6   
3         (M)         (K)                 0.6                 1.0      0.6   
4         (O)         (K)                 0.6                 1.0      0.6   
5         (Y)         (K)                 0.6                 1.0      0.6   
6      (O, E)         (K)                 0.6                 1.0      0.6   
7      (O, K)         (E)                 0.6                 0.8      0.6   
8         (O)      (E, K)                 0.6                 0.8      0.6   

   confidence  lift  leverage  conviction  zhangs_metric  
0         1.0  1.00      0.00         inf            0.0  
1         0.8  1.00      0.00         1.0          