In [None]:
# Step 1: Load CSV and Preprocess (ignore Name, use Age, Gender, Marks)
def load_and_preprocess_data(file_path):
    transactions = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines[1:]:  # Skip header
            values = line.strip().split(',')
            age = int(values[1])
            gender = 0 if values[2] == 'M' else 1  # M=0, F=1
            marks = int(values[3])
            transactions.append(set([age, gender, marks]))
    return transactions

# Step 2: Generate 1-itemsets
def generate_1_itemsets(transactions):
    itemsets = {}
    for transaction in transactions:
        for item in transaction:
            if item in itemsets:
                itemsets[item] += 1
            else:
                itemsets[item] = 1
    return {frozenset([item]): count for item, count in itemsets.items()}

# Step 3: Generate k-itemsets from previous frequent itemsets
def generate_k_itemsets(prev_itemsets, k):
    keys = list(prev_itemsets.keys())
    candidates = {}
    for i in range(len(keys)):
        for j in range(i + 1, len(keys)):
            union = keys[i].union(keys[j])
            if len(union) == k:
                candidates[union] = 0
    return candidates

# Step 4: Count support of candidates
def count_support(candidates, transactions):
    for transaction in transactions:
        for itemset in candidates:
            if itemset.issubset(transaction):
                candidates[itemset] += 1
    return candidates

# Step 5: Filter itemsets by support
def filter_by_support(itemsets, transactions, min_support):
    frequent = {}
    total = len(transactions)
    for itemset, count in itemsets.items():
        support = count / total
        if support >= min_support:
            frequent[itemset] = count
    return frequent

# Step 6: Generate association rules
def generate_association_rules(all_frequent_itemsets, total_transactions, min_confidence):
    rules = []
    support_lookup = {}

    # Build support lookup
    for level in all_frequent_itemsets:
        for itemset, count in level.items():
            support_lookup[itemset] = count / total_transactions

    for level in all_frequent_itemsets[1:]:  # skip 1-itemsets
        for itemset in level:
            for i in range(1, len(itemset)):
                subsets = get_subsets(itemset, i)
                for antecedent in subsets:
                    consequent = itemset - antecedent
                    if antecedent in support_lookup:
                        confidence = support_lookup[itemset] / support_lookup[antecedent]
                        if confidence >= min_confidence:
                            rules.append((set(antecedent), set(consequent), confidence))
    return rules

# Helper: Get all subsets of given size
def get_subsets(itemset, size):
    from itertools import combinations
    return [frozenset(s) for s in combinations(itemset, size)]

# Step 7: Apriori main function
def apriori(file_path, min_support, min_confidence):
    transactions = load_and_preprocess_data(file_path)
    all_frequent = []

    one_itemsets = generate_1_itemsets(transactions)
    frequent_itemsets = filter_by_support(one_itemsets, transactions, min_support)
    all_frequent.append(frequent_itemsets)

    k = 2
    while True:
        candidates = generate_k_itemsets(frequent_itemsets, k)
        if not candidates:
            break
        candidates = count_support(candidates, transactions)
        frequent_itemsets = filter_by_support(candidates, transactions, min_support)
        if not frequent_itemsets:
            break
        all_frequent.append(frequent_itemsets)
        k += 1

    rules = generate_association_rules(all_frequent, len(transactions), min_confidence)
    return all_frequent, rules

# Step 8: Run and display results
file_path = '/content/sample_data/sample_dataset(2).csv'  # Your uploaded file
min_support = 0.3  # 30%
min_confidence = 0.6  # 60%

frequent_itemsets, association_rules = apriori(file_path, min_support, min_confidence)

# Display frequent itemsets
print(f"\nFrequent Itemsets (min_support = {min_support}):")
for i, level in enumerate(frequent_itemsets):
    print(f"\nLevel {i + 1} Frequent Itemsets:")
    for itemset, count in level.items():
        print(f"Itemset: {list(itemset)}, Support Count: {count}")

# Display association rules
print(f"\nAssociation Rules (min_confidence = {min_confidence}):")
for antecedent, consequent, confidence in association_rules:
    print(f"If {antecedent} → Then {consequent} (Confidence = {confidence:.2f})")



Frequent Itemsets (min_support = 0.3):

Level 1 Frequent Itemsets:
Itemset: [1], Support Count: 4
Itemset: [18], Support Count: 5
Itemset: [0], Support Count: 6
Itemset: [17], Support Count: 3

Level 2 Frequent Itemsets:
Itemset: [0, 18], Support Count: 3

Association Rules (min_confidence = 0.6):
If {18} → Then {0} (Confidence = 0.60)
