<a href="https://colab.research.google.com/github/Arpitd07/DWM-Experiments/blob/main/DWM_EXP_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from itertools import combinations

# Function to get frequent itemsets based on minimum support
def get_frequent_itemsets(transactions, min_support):
    itemsets = {}
    for transaction in transactions:
        for item in transaction:
            if item in itemsets:
                itemsets[item] += 1
            else:
                itemsets[item] = 1

    # Filter itemsets to only include those that meet or exceed the minimum support
    frequent_itemsets = {item: support for item, support in itemsets.items() if support >= min_support}
    return frequent_itemsets

# Function to generate candidate itemsets of size k
def get_candidate_itemsets(frequent_itemsets, k):
    candidates = []
    frequent_items = list(frequent_itemsets.keys())
    for combination in combinations(frequent_items, k):
        candidates.append(combination)
    return candidates


# Apriori algorithm to find all frequent itemsets
def apriori(transactions, min_support):
    k = 1
    # Initial set of frequent itemsets
    frequent_itemsets = get_frequent_itemsets(transactions, min_support)
    all_frequent_itemsets = [frequent_itemsets]

    # Iterate to find larger itemsets
    while frequent_itemsets:
        k += 1
        # Generate candidate itemsets of size k
        candidates = get_candidate_itemsets(frequent_itemsets, k)
        candidate_supports = {candidate: 0 for candidate in candidates}

        # Calculate support for each candidate itemset
        for transaction in transactions:
            for candidate in candidates:
                if set(candidate).issubset(set(transaction)):
                    candidate_supports[candidate] += 1

        # Filter candidate itemsets to only include those that meet or exceed the minimum support
        frequent_itemsets = {itemset: support for itemset, support in candidate_supports.items() if support >= min_support}
        if frequent_itemsets:
            all_frequent_itemsets.append(frequent_itemsets)

    return all_frequent_itemsets


transactions = [
    ['Milk', 'Bread', 'Nutella'],
    ['Bread', 'Nutella'],
    ['Milk', 'Bread', 'Sugar', 'Nutella'],
    ['Milk', 'Bread'],
    ['Milk', 'Sugar'],
    ['Bread', 'Sugar', 'Nutella'],
    ['Milk', 'Nutella']
]

min_support = 2
frequent_itemsets = apriori(transactions, min_support)
print(frequent_itemsets)

[{'Milk': 5, 'Bread': 5, 'Nutella': 5, 'Sugar': 3}, {('Milk', 'Bread'): 3, ('Milk', 'Nutella'): 3, ('Milk', 'Sugar'): 2, ('Bread', 'Nutella'): 4, ('Bread', 'Sugar'): 2, ('Nutella', 'Sugar'): 2}]


In [None]:
# Import required libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

data = {'Transaction': [
    ['Milk', 'Bread', 'Nutella'],
    ['Bread', 'Nutella'],
    ['Milk', 'Bread', 'Sugar', 'Nutella'],
    ['Milk', 'Bread'],
    ['Milk', 'Sugar'],
    ['Bread', 'Sugar', 'Nutella'],
    ['Milk', 'Nutella']
]}

# Convert dataset into DataFrame
df = pd.DataFrame(data['Transaction'])

# Convert transaction data into one-hot encoded format
basket = pd.get_dummies(df.apply(pd.Series).stack()).groupby(level=0).sum()

# Set minimum support and confidence
min_support = 0.3
min_confidence = 0.6

# Apply Apriori algorithm to get frequent itemsets
frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

# Display results
print("Frequent Itemsets:\n", frequent_itemsets)
print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Frequent Itemsets:
     support          itemsets
0  0.714286           (Bread)
1  0.714286            (Milk)
2  0.714286         (Nutella)
3  0.428571           (Sugar)
4  0.428571     (Milk, Bread)
5  0.571429  (Nutella, Bread)
6  0.428571   (Milk, Nutella)

Association Rules:
   antecedents consequents   support  confidence  lift
0      (Milk)     (Bread)  0.428571         0.6  0.84
1     (Bread)      (Milk)  0.428571         0.6  0.84
2   (Nutella)     (Bread)  0.571429         0.8  1.12
3     (Bread)   (Nutella)  0.571429         0.8  1.12
4      (Milk)   (Nutella)  0.428571         0.6  0.84
5   (Nutella)      (Milk)  0.428571         0.6  0.84


