In [3]:
import pandas as pd
import ast
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Load your CSV
df = pd.read_csv("product_relation_dataset.csv")

# Convert AdditionalProducts string representation to list
df['AdditionalProducts'] = df['AdditionalProducts'].apply(ast.literal_eval)

# Combine MainProduct and AdditionalProducts into one list per row (one transaction)
df['Products'] = df.apply(lambda row: [row['MainProduct']] + row['AdditionalProducts'], axis=1)

# Encode transactions for apriori
te = TransactionEncoder()
te_ary = te.fit(df['Products']).transform(df['Products'])
df_trans = pd.DataFrame(te_ary, columns=te.columns_)

# Generate frequent itemsets
frequent_itemsets = apriori(df_trans, min_support=0.01, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)

# Filter strong rules by lift and leverage
strong_rules = rules[(rules['lift'] > 1.0) & (rules['leverage'] > 0)]
strong_rules = strong_rules.sort_values(by=['lift', 'confidence'], ascending=False)

# Show top 10 rules sorted by confidence
print(rules.sort_values(by="confidence", ascending=False).head(10))


                  antecedents         consequents  antecedent support  \
0            (Bike Insurance)     (Car Insurance)                 0.2   
1             (Car Insurance)    (Bike Insurance)                 0.2   
3            (Bike Insurance)       (Credit Card)                 0.2   
5            (Bike Insurance)              (Loan)                 0.2   
7             (Car Insurance)       (Credit Card)                 0.2   
17                     (Loan)       (Credit Card)                 0.8   
11            (Demat Account)       (Credit Card)                 0.4   
9             (Car Insurance)              (Loan)                 0.2   
32  (Personal Accident Cover)  (Health Insurance)                 0.2   
29         (Health Insurance)    (Life Insurance)                 0.4   

    consequent support  support  confidence  lift  representativity  leverage  \
0                  0.2      0.2         1.0  5.00               1.0      0.16   
1                  0.2      0.2   