<a href="https://colab.research.google.com/github/Chandriya/Data-Warehousing/blob/main/Lab2_data_Warehousing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mlxtend




In [None]:
from google.colab import files
uploaded = files.upload()


Saving Space.txt to Space.txt
Saving Sports.txt to Sports.txt


In [None]:
import pandas as pd
import time
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules

def load_data(file):
    with open(file) as f:
        return [line.strip().split(',')[1:] for line in f]

def preprocess(data):
    te = TransactionEncoder()
    return pd.DataFrame(te.fit_transform(data), columns=te.columns_)

def analyze(df, algo, min_sup=0.2, min_conf=0.6):
    start = time.time()
    if algo == 'apriori':
        itemsets = apriori(df, min_support=min_sup, use_colnames=True)
    else:
        itemsets = fpgrowth(df, min_support=min_sup, use_colnames=True)

    rules = association_rules(itemsets, metric='confidence', min_threshold=min_conf) if not itemsets.empty else pd.DataFrame()
    duration = time.time() - start
    avg_conf = rules['confidence'].mean() if not rules.empty else 0
    avg_lift = rules['lift'].mean() if not rules.empty else 0
    return itemsets, rules, duration, avg_conf, avg_lift

def display_results(title, itemsets, rules):
    print(f"\n--- {title} Frequent Itemsets ---")
    print(itemsets[['support', 'itemsets']])

    print(f"\n--- {title} Association Rules ---")
    if rules.empty:
        print("No association rules found.")
    else:
        print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

def compare(file):
    print(f"\n===================== Processing {file} =====================")
    data = load_data(file)
    df = preprocess(data)

    ap_items, ap_rules, ap_time, ap_conf, ap_lift = analyze(df, 'apriori')
    display_results("Apriori", ap_items, ap_rules)

    fp_items, fp_rules, fp_time, fp_conf, fp_lift = analyze(df, 'fpgrowth')
    display_results("FP-Growth", fp_items, fp_rules)

    print(f"\nSummary Comparison for {file}:")
    print(f"{'Method':<15} {'Itemsets':<10} {'Rules':<10} {'Time (s)':<10} {'Avg Conf':<10} {'Avg Lift':<10}")
    print(f"{'-'*65}")
    print(f"{'Apriori':<15} {len(ap_items):<10} {len(ap_rules):<10} {ap_time:<10.4f} {ap_conf:<10.4f} {ap_lift:<10.4f}")
    print(f"{'FP-Growth':<15} {len(fp_items):<10} {len(fp_rules):<10} {fp_time:<10.4f} {fp_conf:<10.4f} {fp_lift:<10.4f}")

    if ap_time < fp_time:
        print("\nApriori is faster.")
    elif fp_time < ap_time:
        print("\nFP-Growth is faster.")
    else:
        print("\nBoth methods took the same amount of time.")

compare("Sports.txt")
compare("Space.txt")





--- Apriori Frequent Itemsets ---
   support        itemsets
0     0.25  (cricket ball)
1     0.35   (cricket bat)
2     0.35      (football)
3     0.25        (gloves)
4     0.40         (juice)
5     0.20  (water bottle)

--- Apriori Association Rules ---
No association rules found.

--- FP-Growth Frequent Itemsets ---
   support        itemsets
0     0.40         (juice)
1     0.35      (football)
2     0.35   (cricket bat)
3     0.25        (gloves)
4     0.25  (cricket ball)
5     0.20  (water bottle)

--- FP-Growth Association Rules ---
No association rules found.

Summary Comparison for Sports.txt:
Method          Itemsets   Rules      Time (s)   Avg Conf   Avg Lift  
-----------------------------------------------------------------
Apriori         6          0          0.0079     0.0000     0.0000    
FP-Growth       6          0          0.0094     0.0000     0.0000    

Apriori is faster.


--- Apriori Frequent Itemsets ---
   support        itemsets
0     0.35  (Food Packe