In [19]:
# Association Rule Mining
# Association Rule Mining is a technique used to discover relationships between items in large datasets.
# It helps answer questions like: "Which products are frequently bought together?"
# It's widely used in:
# ✔ Market Basket Analysis (e.g., "People who buy bread often buy butter.")
# ✔ Recommender Systems (e.g., Amazon’s "Customers who bought this also bought…")
# ✔ Fraud Detection (finding unusual transaction patterns)

# Apriori Algorithm Overview
# The Apriori Algorithm is the most popular technique for association rule mining.
# It works in two main steps:
# 1️.) Find Frequent Itemsets
#    - Identify sets of items that appear together frequently in transactions.
#    - This is based on Support (minimum occurrence threshold).
# 2️.) Generate Association Rules
#    - Find relationships between these frequent itemsets.
#    - Rules are evaluated using Confidence and Lift.

# Key Metrics in Apriori:
# Support = (Transactions with A and B) / (Total Transactions) → Measures how often an itemset appears in transactions.
# Confidence = (Transactions with A and B) / (Transactions with A) → Measures the probability that B is bought given A.
# Lift = (Confidence of (A → B)) / (Expected Confidence if A and B were independent) → Measures the strength of the association.
# High Lift (>1) → A & B are positively correlated (strong relationship).
# Lift = 1 → No correlation.
# Lift <1 → A & B are negatively correlated.

In [20]:
pip install apriori



In [21]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import seaborn as sns
import matplotlib.pyplot as plt

In [22]:
store_data = pd.read_csv('store_data.csv')

In [23]:
store_data.head()

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,


In [24]:
store_data = pd.read_csv('store_data.csv', header=None)

In [25]:
store_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [26]:
# Data Proprocessing
records = []
for i in range(0, 7501):
    records.append([str(store_data.values[i,j]) for j in range(0, 20)])

In [27]:
!pip install mlxtend --quiet
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
df = pd.DataFrame(records)
df_encoded = pd.get_dummies(df.stack()).groupby(level=0).max()

# Apply Apriori Algorithm
frequent_itemsets = apriori(df_encoded, min_support=0.0045, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)
rules = rules[(rules["lift"] >= 3) & (rules["confidence"] >= 0.2)]
for index, row in rules.iterrows():
    antecedents = ', '.join(list(row['antecedents']))
    consequents = ', '.join(list(row['consequents']))
    print(f"Rule: {antecedents} -> {consequents}")
    print(f"Support: {row['support']}")
    print(f"Confidence: {row['confidence']}")
    print(f"Lift: {row['lift']}")
    print("="*40)


Rule: light cream -> chicken
Support: 0.004532728969470737
Confidence: 0.29059829059829057
Lift: 4.84395061728395
Rule: mushroom cream sauce -> escalope
Support: 0.005732568990801226
Confidence: 0.3006993006993007
Lift: 3.790832696715049
Rule: pasta -> escalope
Support: 0.005865884548726837
Confidence: 0.3728813559322034
Lift: 4.700811850163794
Rule: herb & pepper -> ground beef
Support: 0.015997866951073192
Confidence: 0.3234501347708895
Lift: 3.2919938411349285
Rule: tomato sauce -> ground beef
Support: 0.005332622317024397
Confidence: 0.3773584905660377
Lift: 3.840659481324083
Rule: whole wheat pasta -> olive oil
Support: 0.007998933475536596
Confidence: 0.2714932126696833
Lift: 4.122410097642296
Rule: pasta -> shrimp
Support: 0.005065991201173177
Confidence: 0.3220338983050847
Lift: 4.506672147735896
Rule: light cream, nan -> chicken
Support: 0.004532728969470737
Confidence: 0.29059829059829057
Lift: 4.84395061728395
Rule: light cream -> chicken, nan
Support: 0.004532728969470737
C