In [5]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Load dataset
df = pd.read_csv("Groceries_dataset.csv", header=None)

# Convert transactions into a list of lists
transactions = []
for i in range(len(df)):
    transactions.append([str(df.values[i, j]) for j in range(len(df.columns)) if str(df.values[i, j]) != 'nan'])

# One-hot encode transactions
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

print("Encoded Transactions:")
print(df_encoded.head())

Encoded Transactions:
   01-01-2014  01-01-2015  01-02-2014  01-02-2015  01-03-2014  01-03-2015  \
0       False       False       False       False       False       False   
1       False       False       False       False       False       False   
2       False       False       False       False       False       False   
3       False       False       False       False       False       False   
4       False       False       False       False       False       False   

   01-04-2014  01-04-2015  01-05-2014  01-05-2015  ...  turkey  vinegar  \
0       False       False       False       False  ...   False    False   
1       False       False       False       False  ...   False    False   
2       False       False       False       False  ...   False    False   
3       False       False       False       False  ...   False    False   
4       False       False       False       False  ...   False    False   

   waffles  whipped/sour cream  whisky  white bread  white wine 

In [6]:
frequent_itemsets = apriori(df_encoded, min_support=0.01, use_colnames=True)

print("\nFrequent Itemsets:")
print(frequent_itemsets.head())


Frequent Itemsets:
    support         itemsets
0  0.013311           (beef)
1  0.017722   (bottled beer)
2  0.024067  (bottled water)
3  0.014729    (brown bread)
4  0.013775         (butter)


In [7]:
# Generate association rules with min_threshold=0.5 (50% confidence)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

print("\nAssociation Rules:")
print(rules.head())


Association Rules:
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []


In [8]:
strong_rules = rules[(rules['lift'] > 1.5) & (rules['confidence'] > 0.6)]

print("\nStrong Association Rules:")
print(strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Strong Association Rules:
Empty DataFrame
Columns: [antecedents, consequents, support, confidence, lift]
Index: []
