<a href="https://colab.research.google.com/github/AUMANSH/Marwadi-University-Data-Science-Capstone-Projects/blob/main/DWDM_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [None]:
import itertools

## Define Dataset

In [None]:
# Multiple datasets stored directly in the notebook
datasets = {
    "Grocery": [
        {"Milk","Bread","Butter"},
        {"Bread","Butter","Jam"},
        {"Milk","Bread","Butter","Jam"},
        {"Milk","Bread"},
        {"Milk","Butter"},
        {"Bread","Butter"},
        {"Milk","Bread","Jam"},
        {"Milk","Jam"},
        {"Milk","Bread","Butter","Eggs"},
        {"Bread","Jam","Eggs"},
        {"Milk","Butter","Eggs"},
        {"Bread","Butter","Jam","Eggs"},
        {"Milk","Bread","Butter"},
        {"Milk","Jam","Eggs"},
        {"Bread","Butter","Jam"},
    ],
    "Electronics": [
        {"Phone","Case","Charger"},
        {"Laptop","Mouse","Bag"},
        {"Phone","Earbuds"},
        {"Laptop","Mouse","Charger"},
        {"Camera","Tripod","SD Card"},
        {"Phone","Charger","Earbuds","Case"},
        {"Laptop","Bag"},
        {"Camera","SD Card"},
        {"Earbuds","Case"},
        {"Phone","Power Bank","Charger"},
        {"Laptop","Dock","Mouse"},
        {"Camera","Tripod"},
        {"Phone","Screen Protector","Case"},
        {"Mouse","Keyboard"},
        {"Laptop","Keyboard","Mouse"},
    ],
    "Pharmacy": [
        {"Paracetamol","Bandage","Antiseptic"},
        {"Vitamin C","Zinc","Paracetamol"},
        {"Cough Syrup","Lozenges"},
        {"Paracetamol","Antiseptic"},
        {"Vitamin C","Paracetamol"},
        {"Bandage","Gauze","Antiseptic"},
        {"Vitamin C","Zinc"},
        {"Cough Syrup","Thermometer"},
        {"Paracetamol","Lozenges"},
        {"Zinc","Paracetamol","Bandage"},
        {"Vitamin C","Paracetamol","Thermometer"},
        {"Antiseptic","Cotton"},
        {"Lozenges","Thermometer"},
        {"Zinc","Vitamin C","Paracetamol"},
        {"Bandage","Antiseptic","Cotton"},
    ]
}

## Helper Functions

In [None]:
# Support count function
def support_count(itemset, transactions):
    return sum(1 for t in transactions if itemset.issubset(t))

# Candidate generation
def generate_candidates(prev_frequents, k):
    prev = list(prev_frequents)
    candidates = set()
    for i in range(len(prev)):
        for j in range(i+1, len(prev)):
            a = sorted(prev[i])
            b = sorted(prev[j])
            if a[:k-2] == b[:k-2]:
                c = frozenset(set(a) | set(b))
                if len(c) == k:
                    candidates.add(c)
    return candidates

# Candidate pruning
def prune_candidates(candidates, prev_frequents, k):
    prev_set = set(prev_frequents)
    pruned = set()
    for c in candidates:
        if all(frozenset(subset) in prev_set for subset in itertools.combinations(c, k-1)):
            pruned.add(c)
    return pruned

## Apriori Algorithm

In [None]:
def apriori(transactions, min_support):
    N = len(transactions)

    # 1-itemsets
    item_counts = {}
    for t in transactions:
        for item in t:
            key = frozenset([item])
            item_counts[key] = item_counts.get(key, 0) + 1

    L1 = [i for i,c in item_counts.items() if c / N >= min_support]
    frequents = {1: L1}
    k = 2

    while True:
        prev = frequents.get(k-1, [])
        if not prev:
            break

        Ck = generate_candidates(prev, k)
        Ck = prune_candidates(Ck, prev, k)

        counts = {c: 0 for c in Ck}
        for t in transactions:
            for c in Ck:
                if c.issubset(t):
                    counts[c] += 1

        Lk = [c for c, cnt in counts.items() if cnt / N >= min_support]
        if not Lk:
            break

        frequents[k] = Lk
        k += 1

    support = {}
    for sets in frequents.values():
        for s in sets:
            support[s] = support_count(s, transactions) / N

    return support

## Rule Generation

In [None]:
def generate_rules(frequents_support, min_confidence):
    rules = []
    all_sets = [s for s in frequents_support.keys() if len(s) >= 2]

    for itemset in all_sets:
        items = list(itemset)
        for r_len in range(1, len(items)):
            for A in itertools.combinations(items, r_len):
                A = frozenset(A)
                B = itemset - A
                sup_AB = frequents_support[itemset]
                sup_A = frequents_support.get(A)
                if sup_A and sup_A > 0:
                    confidence = sup_AB / sup_A
                    if confidence >= min_confidence:
                        sup_B = frequents_support.get(B, None)
                        lift = confidence / sup_B if sup_B and sup_B > 0 else None
                        rules.append((A, B, sup_AB, confidence, lift))
    return rules

## Run on All Datasets

In [None]:
min_support = 0.3
min_confidence = 0.6

for name, txns in datasets.items():
    print(f"\n=== Dataset: {name} ===")
    print(f"Transactions: {len(txns)}")

    freq_support = apriori(txns, min_support=min_support)
    print(f"Frequent Itemsets: {len(freq_support)}")

    rules = generate_rules(freq_support, min_confidence=min_confidence)
    print(f"Association Rules: {len(rules)}\n")

    if rules:   # if rules exist
        print("Top Rules:")
        for i,(A,B,sup,conf,lift) in enumerate(
            sorted(rules, key=lambda x: (-x[3], -x[2]))[:10],1):  # sort by confidence, then support
            print(f"{i}. {set(A)} -> {set(B)} | support={sup:.2f}, confidence={conf:.2f}, lift={lift:.2f}")
    else:
        print("⚠️ No association rules found with given thresholds.")
    print("-"*50)


=== Dataset: Grocery ===
Transactions: 15
Frequent Itemsets: 9
Association Rules: 6

Top Rules:
1. {'Butter'} -> {'Bread'} | support=0.53, confidence=0.80, lift=1.09
2. {'Jam'} -> {'Bread'} | support=0.40, confidence=0.75, lift=1.02
3. {'Bread'} -> {'Butter'} | support=0.53, confidence=0.73, lift=1.09
4. {'Butter'} -> {'Milk'} | support=0.40, confidence=0.60, lift=0.90
5. {'Milk'} -> {'Butter'} | support=0.40, confidence=0.60, lift=0.90
6. {'Milk'} -> {'Bread'} | support=0.40, confidence=0.60, lift=0.82
--------------------------------------------------

=== Dataset: Electronics ===
Transactions: 15
Frequent Itemsets: 3
Association Rules: 0

⚠️ No association rules found with given thresholds.
--------------------------------------------------

=== Dataset: Pharmacy ===
Transactions: 15
Frequent Itemsets: 3
Association Rules: 0

⚠️ No association rules found with given thresholds.
--------------------------------------------------
