# Implementation of Apriori Algorithms for Association Rule Mining for transations data

**Here are the structured steps used in the code for the implementation of the Apriori Algorithm:**

1.  Import Libraries: Import necessary libraries such as pandas for data manipulation and collections for counters.
2.  Load Data:  Load the transactional data from a CSV file using pandas DataFrame.
3.   Extract All Products: Define a function to extract all products from the DataFrame.
4.   Define Global Variables: Define variables for frequent itemsets, supports, and a list to store discarded itemsets. Also, set the minimum support threshold.
5.   Define Helper Functions:
```
get_items: Get unique items from the transactions.
calculate_support: Calculate the support count of an itemset in transactions.
print_frequent_itemsets: Print frequent itemsets and their supports.
print_supersets: Print supersets of a given itemset.
get_itemsetset: Generate frequent itemsets of a specified size.
generate_frequent_itemset: Generate frequent itemsets of all sizes.
get_subsets: Generate all possible subsets of an itemset.
generate_association_rules: Generate association rules based on frequent itemsets.
```
6.   Generate Frequent Itemsets:
```
Get unique items from transactions.
Iterate through all itemset sizes from 1 to the number of unique items.
For each size, generate frequent itemsets and print them along with their supports.
Print supersets of each frequent itemset size.
```
7.   Generate Association Rules:
```
Define the minimum confidence threshold.
Generate association rules from the frequent itemsets.
For each frequent itemset, generate all possible subsets.
Calculate the confidence of each association rule.
If the confidence meets the threshold, add the rule to the list of association rules.
```
8.   Print Association Rules: Print the generated association rules along with their antecedents, consequents, and confidence.

This structured approach ensures the step-by-step implementation of the Apriori Algorithm for mining association rules from transactional data.

In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from collections import Counter
from itertools import combinations

url = 'https://raw.githubusercontent.com/Rk-Pudasaini/Applied_Machine_Learning/main/Data_Science_Projects/Data_mining/basket_data.csv'
df = pd.read_csv(url, header=None)

# Define the function to extract all products from the DataFrame
def extract_all_products(df):
    all_products = []
    for index, row in df.iterrows():
        row_products = []
        for cell in row:
            if pd.notnull(cell):
                cell_products = cell.split(',')
                cell_products = [product.strip() for product in cell_products if product.strip()]
                if cell_products:
                    row_products.extend(cell_products)
        all_products.append(row_products)
    return all_products

# Call the function to extract products
all_products = extract_all_products(df)

# Define global variables
frequent_itemsets = []
supports = []
discarded = []
min_support = 2  # Set your minimum support threshold here

# Define helper functions
def get_items(transactions):
    items = set()
    for transaction in transactions:
        items.update(transaction)
    return list(items)

def calculate_support(itemset, transactions):
    count = 0
    for transaction in transactions:
        if set(itemset).issubset(transaction):
            count += 1
    return count

def print_frequent_itemsets(n, itemsets, supports):
    print(f"Frequent Itemsets for n={n}:")
    for i in range(len(itemsets)):
        print(f"Itemset: {itemsets[i]}, Support: {supports[i]}")

def print_supersets(itemset, n):
    subsets = [set()]
    for i in range(1, len(itemset) + 1):
        if n == 1:
            superset_list = [set([x]) for x in itemset]
        elif n == 2 and i == 2:
            superset_list = [set(pair) for pair in combinations(itemset, 2)]
        elif n > 2:
            if i == 1 or i == n - 1:  # Filter single item subsets and subsets of size n-1
                superset_list = []
            else:
                superset_list = [set(comb) for comb in combinations(itemset, i)]
        else:
            superset_list = []

        subsets.extend(superset_list)

    subsets = [subset for subset in subsets if subset]  # Exclude empty sets
    return subsets

def get_itemsetset(items, n):
    frequent_sets = []
    for itemset in combinations(items, n):
        support_count = calculate_support(itemset, transactions)
        if support_count >= min_support:
            frequent_sets.append(itemset)
            supports[n - 1].append(support_count)
        else:
            discarded.append(itemset)
    return frequent_sets

def generate_frequent_itemset():
    items = get_items(transactions)
    for n in range(1, len(items) + 1):  # Generate frequent itemsets for n=1 to len(items)
        print(f" ======== n = {n} =========")
        frequent_itemsets.append([])  # Initialize the list for frequent itemsets of size n
        supports.append([])  # Initialize the list for supports of frequent itemsets of size n

        # Generate frequent itemsets of size n
        itemsets = get_itemsetset(items, n)
        frequent_itemsets[n - 1] = itemsets  # Update the list of frequent itemsets
        print_frequent_itemsets(n, itemsets, supports[n - 1])

        # Generate and print supersets of frequent itemsets of size n
        all_supersets = []
        for itemset in itemsets:
            subsets = print_supersets(itemset, n)
            all_supersets.extend(subsets)

        print(f"Supersets for n={n}: {all_supersets}")

        # Check if the necessary condition is met and break the loop if it is
        if len(itemsets) <= 1:
            break

    return frequent_itemsets

def get_subsets(itemset):
    subsets = []
    for i in range(1, len(itemset)):
        subsets.extend(combinations(itemset, i))
    return subsets

def generate_association_rules(frequent_itemsets, min_confidence, transactions):
    association_rules = []
    max_itemset_number = len(frequent_itemsets) - 1

    for I in frequent_itemsets[max_itemset_number]:
        subsets = get_subsets(I)
        for S in subsets:
            S = frozenset(S)  # Convert the tuple to a frozenset
            I_S = frozenset(I) - S  # Convert the tuple to a frozenset before subtracting
            if len(I_S) <= 0:
                continue
            I_support = calculate_support(I, transactions)
            S_support = calculate_support(S, transactions)

            confidence = I_support / S_support
            if confidence >= min_confidence:
                association_rules.append((S, I_S, confidence))
    return association_rules

# Sample transactions data (replace with your actual transactions)
transactions = all_products[:30]

# Generate frequent itemsets
frequent_itemsets = generate_frequent_itemset()

# Define the minimum confidence threshold
min_confidence = 0.2

# Generate association rules
association_rules = generate_association_rules(frequent_itemsets, min_confidence, transactions)

print("Number of association rules generated:", len(association_rules))  # Debugging statement

# Print the association rules
print("Association Rules:")
for antecedent, consequent, confidence in association_rules:
    print(f"Antecedent: {antecedent} => Consequent: {consequent} | Confidence: {confidence:.2f}")


Frequent Itemsets for n=1:
Itemset: ('shrimp',), Support: 4
Itemset: ('avocado',), Support: 4
Itemset: ('soup',), Support: 2
Itemset: ('spaghetti',), Support: 7
Itemset: ('black tea',), Support: 2
Itemset: ('salmon',), Support: 4
Itemset: ('green tea',), Support: 4
Itemset: ('low fat yogurt',), Support: 3
Itemset: ('eggs',), Support: 8
Itemset: ('sparkling water',), Support: 2
Itemset: ('cooking oil',), Support: 2
Itemset: ('milk',), Support: 4
Itemset: ('meatballs',), Support: 2
Itemset: ('frozen smoothie',), Support: 2
Itemset: ('burgers',), Support: 3
Itemset: ('turkey',), Support: 4
Itemset: ('cookies',), Support: 2
Itemset: ('pasta',), Support: 2
Itemset: ('chicken',), Support: 3
Itemset: ('mineral water',), Support: 10
Itemset: ('chocolate',), Support: 3
Itemset: ('energy bar',), Support: 2
Itemset: ('yams',), Support: 2
Itemset: ('french fries',), Support: 4
Itemset: ('honey',), Support: 4
Itemset: ('frozen vegetables',), Support: 2
Supersets for n=1: [{'shrimp'}, {'avocado'}, {