# 🛒 Market Basket Analysis using Apriori Algorithm
This notebook analyzes shopping transaction data using the Apriori algorithm to find associations between items.

## 🔹 Step 1: Load the Data

In [35]:
import pandas as pd

df = pd.read_csv("Dataset.csv")

df.head()

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,


## 🔹 Step 2: Convert Each Row to a List of Items

In [36]:
transactions = df.apply(lambda row: row.dropna().tolist(), axis=1).tolist()

transactions[:5]

[['burgers', 'meatballs', 'eggs'],
 ['chutney'],
 ['turkey', 'avocado'],
 ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'],
 ['low fat yogurt']]

## 🔹 Step 3: Split Data into Training and Testing Sets

In [37]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(transactions, test_size=0.2, random_state=42)

## 🔹 Step 4: Convert Training Data to True/False Table

In [38]:
! pip install mlxtend

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\tvs21\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [39]:
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
train_array = te.fit(train).transform(train)
train_df = pd.DataFrame(train_array, columns=te.columns_)
train_df.head()

Unnamed: 0,almonds,antioxydant juice,asparagus,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,body spray,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## 🔹 Step 5: Find Frequent Itemsets

In [40]:
from mlxtend.frequent_patterns import apriori

frequent_itemsets = apriori(train_df, min_support=0.05, use_colnames=True)


frequent_itemsets

Unnamed: 0,support,itemsets
0,0.0875,(burgers)
1,0.078167,(cake)
2,0.0605,(chicken)
3,0.163,(chocolate)
4,0.078833,(cookies)
5,0.050667,(cooking oil)
6,0.176333,(eggs)
7,0.078667,(escalope)
8,0.1675,(french fries)
9,0.062833,(frozen smoothie)


## 🔹 Step 6: Create If-Then Rules

In [41]:
from mlxtend.frequent_patterns import association_rules

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift


##  ظ🔹   Step 7: Check the Rules on Unseen Data

In [42]:
cleaned_test = [[item.strip().lower() for item in basket] for basket in test]

def rule_applies(rule, transaction):
    return rule.issubset(set(transaction))

evaluated_rules = []

for i, row in rules.iterrows():
    ant = set([item.strip().lower() for item in row['antecedents']])
    con = set([item.strip().lower() for item in row['consequents']])
    
    matched = [t for t in cleaned_test if rule_applies(ant, t)]
    success = [t for t in matched if rule_applies(con, t)]

    if matched:
        test_conf = len(success) / len(matched)
        evaluated_rules.append({
            'antecedents': ant,
            'consequents': con,
            'test_confidence': round(test_conf, 2)
        })

pd.DataFrame(evaluated_rules)
