In [28]:
import pandas as pd
import random
from mlxtend.frequent_patterns import apriori, association_rules

# Pool of items
items = ['Toyota', 'BMW', 'AUDI', 'Merc', 'Land Rover', 'Suzuki', 'Isuzu', 'Volkswagen']

# Generate 10 transactions with 2–5 items each
random.seed(34)
transactions = [[random.choice(items) for _ in range(random.randint(2, 5))] for _ in range(10)]

# Remove duplicates in each transaction
transactions = [list(set(t)) for t in transactions]

# Display transactions
for i, t in enumerate(transactions, 1):
    print(f'Transaction {i}: {t}')


Transaction 1: ['Merc', 'Toyota', 'Isuzu']
Transaction 2: ['Land Rover', 'Isuzu', 'BMW', 'Suzuki']
Transaction 3: ['AUDI', 'BMW']
Transaction 4: ['AUDI', 'Toyota', 'BMW', 'Suzuki']
Transaction 5: ['Land Rover', 'Suzuki']
Transaction 6: ['AUDI', 'Isuzu']
Transaction 7: ['Land Rover', 'Merc', 'Isuzu', 'Suzuki']
Transaction 8: ['AUDI', 'Volkswagen']
Transaction 9: ['BMW', 'Merc', 'Isuzu']
Transaction 10: ['Merc', 'Land Rover', 'BMW', 'Isuzu']


In [29]:
from mlxtend.preprocessing import TransactionEncoder

# One-hot encoding
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_) # converting it into a dateframe

print("\nOne-hot encoded transaction data:")
print(df)



One-hot encoded transaction data:
    AUDI    BMW  Isuzu  Land Rover   Merc  Suzuki  Toyota  Volkswagen
0  False  False   True       False   True   False    True       False
1  False   True   True        True  False    True   False       False
2   True   True  False       False  False   False   False       False
3   True   True  False       False  False    True    True       False
4  False  False  False        True  False    True   False       False
5   True  False   True       False  False   False   False       False
6  False  False   True        True   True    True   False       False
7   True  False  False       False  False   False   False        True
8  False   True   True       False   True   False   False       False
9  False   True   True        True   True   False   False       False


In [30]:
# Find frequent itemsets with support >= 0.3
frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)
print("\nFrequent itemsets:")
print(frequent_itemsets)



Frequent itemsets:
   support              itemsets
0      0.4                (AUDI)
1      0.5                 (BMW)
2      0.6               (Isuzu)
3      0.4          (Land Rover)
4      0.4                (Merc)
5      0.4              (Suzuki)
6      0.3          (Isuzu, BMW)
7      0.3   (Land Rover, Isuzu)
8      0.4         (Merc, Isuzu)
9      0.3  (Land Rover, Suzuki)


In [31]:
# Generate rules with confidence >= 0.7
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



Association Rules:
    antecedents   consequents  support  confidence      lift
0  (Land Rover)       (Isuzu)      0.3        0.75  1.250000
1        (Merc)       (Isuzu)      0.4        1.00  1.666667
2  (Land Rover)      (Suzuki)      0.3        0.75  1.875000
3      (Suzuki)  (Land Rover)      0.3        0.75  1.875000


In [32]:
# Filter for Merc -> Isuzu rule 
Merc_Isuzu_rule = rules[(rules['antecedents'] == {'Merc'}) & (rules['consequents'] == {'Isuzu'})]

if not Merc_Isuzu_rule.empty:
    print("\nRule Explanation:")
    rule = Merc_Isuzu_rule.iloc[0]
    print(f"If someone buys {list(rule['antecedents'])[0]}, they are also likely to buy {list(rule['consequents'])[0]} with {rule['confidence']:.2f} confidence.")
else:
    print("\nNo Merc → Isuzu rule found with ≥ 70% confidence.")


Rule Explanation:
If someone buys Merc, they are also likely to buy Isuzu with 1.00 confidence.
