<a href="https://colab.research.google.com/github/Harsh-D-7/Data-Mining/blob/main/Experiment_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
def load_dataset():
    """Load sample dataset."""
    return [['Bread', 'Milk'],
            ['Bread', 'Diapers', 'Beer', 'Eggs'],
            ['Milk', 'Diapers', 'Beer', 'Cola'],
            ['Bread', 'Milk', 'Diapers', 'Beer'],
            ['Bread', 'Milk', 'Diapers', 'Cola']]

def create_itemsets(dataset):
    """Create a set of unique items in the dataset."""
    itemsets = set()
    for transaction in dataset:
        for item in transaction:
            itemsets.add(frozenset([item]))
    return itemsets

def create_frequent_itemsets(dataset, itemsets, min_support):
    """Create frequent itemsets that meet the minimum support threshold."""
    item_counts = {}
    for transaction in dataset:
        for itemset in itemsets:
            if itemset.issubset(transaction):
                item_counts[itemset] = item_counts.get(itemset, 0) + 1
    num_transactions = float(len(dataset))
    frequent_itemsets = []
    for itemset in item_counts:
        support = item_counts[itemset] / num_transactions
        if support >= min_support:
            frequent_itemsets.append(itemset)
    return frequent_itemsets

def create_candidate_itemsets(itemsets, length):
    """Create candidate itemsets of a given length."""
    candidate_itemsets = set()
    for itemset1 in itemsets:
        for itemset2 in itemsets:
            union = itemset1.union(itemset2)
            if len(union) == length:
                candidate_itemsets.add(union)
    return candidate_itemsets

def prune_itemsets(itemsets, candidate_itemsets):
    """Prune candidate itemsets that contain subsets not in itemsets."""
    pruned_itemsets = set()
    for itemset in candidate_itemsets:
        is_valid = True
        for item in itemset:
            subset = itemset.difference(frozenset([item]))
            if subset not in itemsets:
                is_valid = False
                break
        if is_valid:
            pruned_itemsets.add(itemset)
    return pruned_itemsets

def apriori(dataset, min_support):
    """Find frequent itemsets in the dataset using the Apriori algorithm."""
    itemsets = create_itemsets(dataset)
    frequent_itemsets = create_frequent_itemsets(dataset, itemsets, min_support)
    all_frequent_itemsets = list(frequent_itemsets)
    k = 2
    while frequent_itemsets:
        candidate_itemsets = create_candidate_itemsets(frequent_itemsets, k)
        candidate_itemsets = prune_itemsets(frequent_itemsets, candidate_itemsets)
        frequent_itemsets = create_frequent_itemsets(dataset, candidate_itemsets, min_support)
        all_frequent_itemsets.extend(frequent_itemsets)
        k += 1
    return all_frequent_itemsets

# Run the Apriori algorithm
dataset = load_dataset()
min_support = 0.4
frequent_itemsets = apriori(dataset, min_support)
print("Frequent Itemsets:", frequent_itemsets)


Frequent Itemsets: [frozenset({'Bread'}), frozenset({'Milk'}), frozenset({'Diapers'}), frozenset({'Beer'}), frozenset({'Cola'}), frozenset({'Bread', 'Milk'}), frozenset({'Beer', 'Diapers'}), frozenset({'Bread', 'Diapers'}), frozenset({'Bread', 'Beer'}), frozenset({'Milk', 'Beer'}), frozenset({'Cola', 'Milk'}), frozenset({'Cola', 'Diapers'}), frozenset({'Milk', 'Diapers'}), frozenset({'Bread', 'Beer', 'Diapers'}), frozenset({'Milk', 'Beer', 'Diapers'}), frozenset({'Cola', 'Milk', 'Diapers'}), frozenset({'Bread', 'Milk', 'Diapers'})]
