<a href="https://colab.research.google.com/github/KayalvizhiT513/Apriori-Algorithm/blob/main/apriori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import pandas as pd

In [1]:
def generate_association_rules(frequent_itemsets, transactions, min_confidence):
    """
    Generate association rules from frequent itemsets.

    Parameters:
        frequent_itemsets (list of lists): List of frequent itemsets obtained from Apriori algorithm.
        transactions (list of lists): List of transactions, where each transaction is a list of items.
        min_confidence (float): Minimum confidence threshold.

    Returns:
        list: List of association rules in the format (X, Y, confidence).
    """
    association_rules = []

    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                antecedent = itemset[:i]
                consequent = itemset[i:]

                support_antecedent = sum(1 for transaction in transactions if set(antecedent).issubset(set(transaction)))
                support_itemset = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))

                confidence = support_itemset / support_antecedent

                if confidence >= min_confidence:
                    association_rules.append((antecedent, consequent, confidence))

    return association_rules


def apriori(transactions, min_support):
    """
    Perform the Apriori algorithm to find frequent itemsets.

    Parameters:
        transactions (list of lists): List of transactions, where each transaction is a list of items.
        min_support (float): Minimum support threshold.

    Returns:
        list: List of frequent itemsets.
    """
    item_counts = {}
    n_transactions = len(transactions)

    # Count occurrences of each item
    for transaction in transactions:
        for item in transaction:
            item_counts[item] = item_counts.get(item, 0) + 1
    print(item_counts)

    # Find frequent 1-itemsets
    frequent_sets = [[item] for item, count in item_counts.items() if count >= min_support]
    print(frequent_sets)

    k = 2
    candidates = []
    while frequent_sets:
        candidates = generate_candidates(frequent_sets, k)
        print("op",candidates)

        # Count occurrences of candidates in transactions
        candidate_counts = {tuple(candidate): 0 for candidate in candidates}

        for transaction in transactions:
            for candidate in candidates:
                if set(candidate).issubset(set(transaction)):
                    candidate_counts[tuple(candidate)] += 1

        # Prune candidates that do not meet minimum support
        frequent_sets = [list(candidate) for candidate, count in candidate_counts.items() if count >= min_support]

        k += 1
        # Generating association rules
        association_rules = generate_association_rules(candidates, transactions, min_confidence)

        # Print association rules
        print("Association Rules:")
        for rule in association_rules:
            print(rule)

    return candidates



In [3]:
def generate_candidates(prev_candidates, k):
    """
    Generate candidate itemsets of size k from the previous frequent itemsets.

    Parameters:
        prev_candidates (list): Previous frequent itemsets.
        k (int): Size of the candidate itemsets to be generated.

    Returns:
        list: Candidate itemsets of size k.
    """
    candidates = []
    n = len(prev_candidates)

    # Generate candidates by joining previous frequent itemsets
    for i in range(n):
        for j in range(i + 1, n):
            prev_set1 = prev_candidates[i]
            prev_set2 = prev_candidates[j]

            # Ensure the first k-2 elements are the same in both sets before joining
            if prev_set1[:k - 2] == prev_set2[:k - 2]:
                candidates.append(sorted(list(set(prev_set1) | set(prev_set2))))

    return candidates


def prune_candidates(candidates, prev_frequent_sets, k):
    """
    Prune candidate itemsets that contain subsets not in the previous frequent itemsets.

    Parameters:
        candidates (list): Candidate itemsets to be pruned.
        prev_frequent_sets (list): Previous frequent itemsets.
        k (int): Size of the candidate itemsets.

    Returns:
        list: Pruned candidate itemsets.
    """
    pruned_candidates = []

    for candidate in candidates:
        subsets = [candidate[:i] + candidate[i + 1:] for i in range(k)]
        if all(subset in prev_frequent_sets for subset in subsets):
            pruned_candidates.append(candidate)

    return pruned_candidates


def generate_association_rules(frequent_itemsets, transactions, min_confidence):
    """
    Generate association rules from frequent itemsets.

    Parameters:
        frequent_itemsets (list of lists): List of frequent itemsets obtained from Apriori algorithm.
        transactions (list of lists): List of transactions, where each transaction is a list of items.
        min_confidence (float): Minimum confidence threshold.

    Returns:
        list: List of association rules in the format (X, Y, confidence).
    """

    association_rules = []
    print("Frequent Itemset:",frequent_itemsets)
    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                antecedent = itemset[:i]
                consequent = itemset[i:]

                support_antecedent = sum(1 for transaction in transactions if set(antecedent).issubset(set(transaction)))
                support_itemset = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))

                confidence = support_itemset / support_antecedent
                #print("antecedant",antecedent,"consequent",consequent,"confidence",confidence)

                if confidence >= min_confidence:
                    association_rules.append((antecedent, consequent, confidence))

    return association_rules




In [10]:
# Example usage:
transactions = [['bread', 'milk'],
                ['bread', 'diaper', 'beer', 'eggs'],
                ['milk', 'diaper', 'beer', 'cola'],
                ['bread', 'milk', 'diaper', 'beer'],
                ['bread', 'milk', 'diaper', 'cola']]

min_support = 3

In [11]:
frequent_itemsets = apriori(transactions, min_support)
print("Frequent Itemsets:")
print(frequent_itemsets)

{'bread': 4, 'milk': 4, 'diaper': 4, 'beer': 3, 'eggs': 1, 'cola': 2}
[['bread'], ['milk'], ['diaper'], ['beer']]
op [['bread', 'milk'], ['bread', 'diaper'], ['beer', 'bread'], ['diaper', 'milk'], ['beer', 'milk'], ['beer', 'diaper']]
Frequent Itemset: [['bread', 'milk'], ['bread', 'diaper'], ['beer', 'bread'], ['diaper', 'milk'], ['beer', 'milk'], ['beer', 'diaper']]
Association Rules:
(['bread'], ['milk'], 0.75)
(['bread'], ['diaper'], 0.75)
(['beer'], ['bread'], 0.6666666666666666)
(['diaper'], ['milk'], 0.75)
(['beer'], ['milk'], 0.6666666666666666)
(['beer'], ['diaper'], 1.0)
op [['bread', 'diaper', 'milk']]
Frequent Itemset: [['bread', 'diaper', 'milk']]
Association Rules:
(['bread'], ['diaper', 'milk'], 0.5)
(['bread', 'diaper'], ['milk'], 0.6666666666666666)
Frequent Itemsets:
[['bread', 'diaper', 'milk']]


In [12]:
min_support = 3
min_confidence = 0.5

# Generating association rules
association_rules = generate_association_rules(frequent_itemsets, transactions, min_confidence)

# Print association rules
print("Association Rules:")
for rule in association_rules:
    print(rule)


Frequent Itemset: [['bread', 'diaper', 'milk']]
Association Rules:
(['bread'], ['diaper', 'milk'], 0.5)
(['bread', 'diaper'], ['milk'], 0.6666666666666666)


In [13]:
transactions = [['ROE', 'Return'],
                ['ROE', 'Cov', 'Dietz Ret', 'PE'],
                ['Return', 'Cov', 'Dietz Ret', 'IRR'],
                ['ROE', 'Return', 'Cov', 'Dietz Ret'],
                ['ROE', 'Return', 'Cov', 'IRR']]

min_support = 3

In [14]:
frequent_itemsets = apriori(transactions, min_support)
print("Frequent Itemsets:")
print(frequent_itemsets)


{'ROE': 4, 'Return': 4, 'Cov': 4, 'Dietz Ret': 3, 'PE': 1, 'IRR': 2}
[['ROE'], ['Return'], ['Cov'], ['Dietz Ret']]
op [['ROE', 'Return'], ['Cov', 'ROE'], ['Dietz Ret', 'ROE'], ['Cov', 'Return'], ['Dietz Ret', 'Return'], ['Cov', 'Dietz Ret']]
Frequent Itemset: [['ROE', 'Return'], ['Cov', 'ROE'], ['Dietz Ret', 'ROE'], ['Cov', 'Return'], ['Dietz Ret', 'Return'], ['Cov', 'Dietz Ret']]
Association Rules:
(['ROE'], ['Return'], 0.75)
(['Cov'], ['ROE'], 0.75)
(['Dietz Ret'], ['ROE'], 0.6666666666666666)
(['Cov'], ['Return'], 0.75)
(['Dietz Ret'], ['Return'], 0.6666666666666666)
(['Cov'], ['Dietz Ret'], 0.75)
op [['Cov', 'ROE', 'Return'], ['Cov', 'Dietz Ret', 'ROE'], ['Cov', 'Dietz Ret', 'Return']]
Frequent Itemset: [['Cov', 'ROE', 'Return'], ['Cov', 'Dietz Ret', 'ROE'], ['Cov', 'Dietz Ret', 'Return']]
Association Rules:
(['Cov'], ['ROE', 'Return'], 0.5)
(['Cov', 'ROE'], ['Return'], 0.6666666666666666)
(['Cov'], ['Dietz Ret', 'ROE'], 0.5)
(['Cov', 'Dietz Ret'], ['ROE'], 0.6666666666666666)
(['C

In [15]:
min_support = 3
min_confidence = 0.5

# Generating association rules
association_rules = generate_association_rules(frequent_itemsets, transactions, min_confidence)

# Print association rules
print("Association Rules:")
for rule in association_rules:
    print(rule)


Frequent Itemset: [['Cov', 'ROE', 'Return'], ['Cov', 'Dietz Ret', 'ROE'], ['Cov', 'Dietz Ret', 'Return']]
Association Rules:
(['Cov'], ['ROE', 'Return'], 0.5)
(['Cov', 'ROE'], ['Return'], 0.6666666666666666)
(['Cov'], ['Dietz Ret', 'ROE'], 0.5)
(['Cov', 'Dietz Ret'], ['ROE'], 0.6666666666666666)
(['Cov'], ['Dietz Ret', 'Return'], 0.5)
(['Cov', 'Dietz Ret'], ['Return'], 0.6666666666666666)
