In [2]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
    --------------------------------------- 0.0/1.4 MB 435.7 kB/s eta 0:00:04
   - -------------------------------------- 0.1/1.4 MB 544.7 kB/s eta 0:00:03
   -- ------------------------------------- 0.1/1.4 MB 573.4 kB/s eta 0:00:03
   -- ------------------------------------- 0.1/1.4 MB 581.0 kB/s eta 0:00:03
   --- ------------------------------------ 0.1/1.4 MB 504.4 kB/s eta 0:00:03
   --- ------------------------------------ 0.1/1.4 MB 450.6 kB/s eta 0:00:03
   --- ------------------------------------ 0.1/1.4 MB 448.2 kB/s eta 0:00:03
   ---- ----------------------------------- 0.2/1.4 MB 446.5 kB/s eta 0:00:03
   ---- ----------------------------------- 0.2/1.4 MB 446.5 kB/s eta 0:00:03
   ----- ---------------

In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# 1. Read the data
data = {
    'Transaction': [1, 1, 2, 2, 3, 3, 4, 4],
    'Items': ['A,B,C', 'A,C,D', 'B,D', 'A,B,C,D', 'A,B', 'C,D', 'A,C,D', 'B,C']
}
df = pd.DataFrame(data)
transactions = df['Items'].str.get_dummies(',')

In [4]:
# 2. Generate frequent itemsets with a minimum support threshold of 8%
frequent_itemsets = apriori(transactions, min_support=0.08, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)

Frequent Itemsets:
    support      itemsets
0     0.625           (A)
1     0.625           (B)
2     0.750           (C)
3     0.625           (D)
4     0.375        (B, A)
5     0.500        (A, C)
6     0.375        (D, A)
7     0.375        (B, C)
8     0.250        (D, B)
9     0.500        (D, C)
10    0.250     (B, A, C)
11    0.125     (D, B, A)
12    0.375     (D, A, C)
13    0.125     (D, B, C)
14    0.125  (D, B, A, C)




In [5]:
# 3. Display the association rules with a minimum confidence threshold of 50%
# 4. Find all the rules of these subsets that have higher confidence value
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Association Rules:
   antecedents consequents  support  confidence      lift
0          (B)         (A)    0.375    0.600000  0.960000
1          (A)         (B)    0.375    0.600000  0.960000
2          (A)         (C)    0.500    0.800000  1.066667
3          (C)         (A)    0.500    0.666667  1.066667
4          (D)         (A)    0.375    0.600000  0.960000
5          (A)         (D)    0.375    0.600000  0.960000
6          (B)         (C)    0.375    0.600000  0.800000
7          (C)         (B)    0.375    0.500000  0.800000
8          (D)         (C)    0.500    0.800000  1.066667
9          (C)         (D)    0.500    0.666667  1.066667
10      (B, A)         (C)    0.250    0.666667  0.888889
11      (B, C)         (A)    0.250    0.666667  1.066667
12      (A, C)         (B)    0.250    0.500000  0.800000
13      (D, B)         (A)    0.125    0.500000  0.800000
14      (D, A)         (C)    0.375    1.000000  1.333333
15      (D, C)         (A)    0.375    0.750000  1.2