ID: V01053626
Name: Newsha Bahardoost

In [None]:
from itertools import combinations
from collections import defaultdict

def read_data(file_path):
    """Reads basket data from a file, each line represents a transaction."""
    baskets = []
    with open(file_path, 'r') as file:
        for line in file:
            session = set(line.strip().split())  # Read items as a set
            baskets.append(session)
    return baskets

def generate_frequent_itemsets(baskets, prev_frequent, k, min_support=100):
    """Generates frequent itemsets of size k using the Apriori principle."""
    item_counts = defaultdict(int)

    # Generate candidate itemsets
    for basket in baskets:
        for itemset in combinations(sorted(basket), k):
            if k == 1 or all(tuple(sub) in prev_frequent for sub in combinations(itemset, k-1)):
                item_counts[itemset] += 1

    # Prune itemsets that do not meet minimum support threshold
    return {itemset: count for itemset, count in item_counts.items() if count >= min_support}

def calculate_confidence(numerator, denominator):
    """Calculates confidence for association rules."""
    return numerator / denominator if denominator > 0 else 0

def generate_rules(frequent_itemsets, prev_frequent, k):
    """Generates association rules from frequent itemsets."""
    rules = []

    for itemset, count in frequent_itemsets.items():
        for antecedent in combinations(itemset, k-1):
            antecedent = tuple(sorted(antecedent))
            consequent = tuple(sorted(set(itemset) - set(antecedent)))

            if antecedent in prev_frequent:
                conf = calculate_confidence(count, prev_frequent[antecedent])
                rules.append((antecedent, consequent, conf))

    return sorted(rules, key=lambda x: (-x[2], x[0], x[1]))

def main():
    baskets = read_data('/content/p2-baskets.txt')

    # Generate frequent itemsets
    L1 = generate_frequent_itemsets(baskets, None, 1, min_support=100)
    print(f"Number of frequent items in L1: {len(L1)}")

    L2 = generate_frequent_itemsets(baskets, set(L1.keys()), 2, min_support=100)
    print(f"Number of frequent itemsets in L2: {len(L2)}")

    L3 = generate_frequent_itemsets(baskets, set(L2.keys()), 3, min_support=100)
    print(f"Number of frequent itemsets in L3: {len(L3)}")

    # Generate and sort rules
    pair_rules = generate_rules(L2, L1, 2)[:5]
    triplet_rules = generate_rules(L3, L2, 3)[:3]

    # Print results
    print("\nTop 5 Pair Rules:")
    for rule in pair_rules:
        print(f"{' '.join(rule[0])} -> {' '.join(rule[1])}: {rule[2]:.5f}")

    print("\nTop 3 Triplet Rules:")
    for rule in triplet_rules:
        print(f"{', '.join(rule[0])} -> {rule[1][0]}: {rule[2]:.5f}")

if __name__ == "__main__":
    main()

Number of frequent items in L1: 647
Number of frequent itemsets in L2: 1334
Number of frequent itemsets in L3: 233

Top 5 Pair Rules:
DAI93865 -> FRO40251: 1.00000
GRO85051 -> FRO40251: 0.99918
GRO38636 -> FRO40251: 0.99065
ELE12951 -> FRO40251: 0.99057
DAI88079 -> FRO40251: 0.98673

Top 3 Triplet Rules:
DAI23334, ELE92920 -> DAI62779: 1.00000
DAI31081, GRO85051 -> FRO40251: 1.00000
DAI55911, GRO85051 -> FRO40251: 1.00000


In [1]:
from itertools import combinations
from collections import defaultdict

def read_data(file_path):
    """Reads basket data from a file, each line represents a transaction."""
    baskets = []
    with open(file_path, 'r') as file:
        for line in file:
            session = set(line.strip().split())  # Read items as a set
            baskets.append(session)
    return baskets

def generate_frequent_itemsets(baskets, prev_frequent, k, min_support=100):
    """Generates frequent itemsets of size k using the Apriori principle."""
    item_counts = defaultdict(int)

    # Generate candidate itemsets
    for basket in baskets:
        for itemset in combinations(sorted(basket), k):
            if k == 1 or all(tuple(sub) in prev_frequent for sub in combinations(itemset, k-1)):
                item_counts[itemset] += 1

    # Prune itemsets that do not meet minimum support threshold
    return {itemset: count for itemset, count in item_counts.items() if count >= min_support}

def calculate_confidence(numerator, denominator):
    """Calculates confidence for association rules."""
    return numerator / denominator if denominator > 0 else 0

def generate_rules(frequent_itemsets, prev_frequent, k):
    """Generates association rules from frequent itemsets."""
    rules = []

    for itemset, count in frequent_itemsets.items():
        for antecedent in combinations(itemset, k-1):
            antecedent = tuple(sorted(antecedent))
            consequent = tuple(sorted(set(itemset) - set(antecedent)))

            if antecedent in prev_frequent:
                conf = calculate_confidence(count, prev_frequent[antecedent])
                rules.append((antecedent, consequent, conf))

    return sorted(rules, key=lambda x: (-x[2], x[0], x[1]))

# Read data and generate frequent itemsets for Task 1
baskets = read_data('/content/p2-baskets.txt')
L1 = generate_frequent_itemsets(baskets, None, 1, min_support=100)
L2 = generate_frequent_itemsets(baskets, set(L1.keys()), 2, min_support=100)

# Generate and filter pair rules for Task 1 (confidence >= 0.985)
pair_rules = generate_rules(L2, L1, 2)
pair_rules_filtered = [rule for rule in pair_rules if rule[2] >= 0.985]
top_pair_rules = sorted(pair_rules_filtered, key=lambda x: (-x[2], x[0], x[1]))[:5]

# Print Task 1 output
for rule in top_pair_rules:
    print(f"{' '.join(rule[0])} -> {' '.join(rule[1])}: {rule[2]:.5f}")

DAI93865 -> FRO40251: 1.00000
GRO85051 -> FRO40251: 0.99918
GRO38636 -> FRO40251: 0.99065
ELE12951 -> FRO40251: 0.99057
DAI88079 -> FRO40251: 0.98673


In [3]:
# Generate frequent itemsets for Task 2
L3 = generate_frequent_itemsets(baskets, set(L2.keys()), 3, min_support=100)

# Generate and filter triplet rules for Task 2 (confidence == 1.0)
triplet_rules = generate_rules(L3, L2, 3)
triplet_rules_filtered = [rule for rule in triplet_rules if rule[2] == 1.0]
top_triplet_rules = sorted(triplet_rules_filtered, key=lambda x: (-x[2], x[0], x[1]))[:5]

# Print Task 2 output
for rule in top_triplet_rules:
    print(f"{' '.join(rule[0])} -> {' '.join(rule[1])}: {rule[2]:.5f}")

DAI23334 ELE92920 -> DAI62779: 1.00000
DAI31081 GRO85051 -> FRO40251: 1.00000
DAI55911 GRO85051 -> FRO40251: 1.00000
DAI62779 DAI88079 -> FRO40251: 1.00000
DAI75645 GRO85051 -> FRO40251: 1.00000
