<a href="https://colab.research.google.com/github/Naitik1126/Data_Warehousing_and_Mining/blob/main/data_warehouse_and_mining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --quiet mlxtend

import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from IPython.display import display

df_raw = pd.read_csv('canteen_transactions.csv', header=None)
print("Raw data (first 10 rows):")
display(df_raw.head(10))

transactions = []
for idx, row in df_raw.iterrows():
    items = []
    for v in row[1:]:
        if pd.notna(v) and str(v).strip() != '':
            items.append(str(v).strip())
    transactions.append(items)

print("\nSample transaction lists (first 8):")
for t in transactions[:8]:
    print(t)

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
trans_df = pd.DataFrame(te_ary, columns=te.columns_)
print("\nOne-hot encoded shape:", trans_df.shape)
display(trans_df.head())

min_support = 0.2
freq_ap = apriori(trans_df, min_support=min_support, use_colnames=True)
freq_ap['length'] = freq_ap['itemsets'].apply(lambda x: len(x))
freq_ap = freq_ap.sort_values(by=['support','length'], ascending=[False, False]).reset_index(drop=True)
print("\nFrequent itemsets (Apriori):")
display(freq_ap)

freq_fp = fpgrowth(trans_df, min_support=min_support, use_colnames=True)
freq_fp['length'] = freq_fp['itemsets'].apply(lambda x: len(x))
freq_fp = freq_fp.sort_values(by=['support','length'], ascending=[False, False]).reset_index(drop=True)
print("\nFrequent itemsets (FP-Growth):")
display(freq_fp)

min_confidence = 0.5
rules = association_rules(freq_ap, metric="confidence", min_threshold=min_confidence)
rules = rules[['antecedents','consequents','support','confidence','lift']].sort_values(by=['lift','confidence'], ascending=[False,False]).reset_index(drop=True)
print("\nAssociation rules (Apriori -> Rules with min_confidence = {:.2f}):".format(min_confidence))
display(rules)

freq_ap.to_csv('frequent_itemsets_apriori.csv', index=False)
freq_fp.to_csv('frequent_itemsets_fpgrowth.csv', index=False)
rules.to_csv('association_rules.csv', index=False)
print("\nSaved: frequent_itemsets_apriori.csv, frequent_itemsets_fpgrowth.csv, association_rules.csv")

Raw data (first 10 rows):


  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,0,1,2,3
0,T1,Tea,Samosa,Biscuit
1,T2,Coffee,Sandwich,
2,T3,Tea,Samosa,
3,T4,Burger,Cold Drink,
4,T5,Tea,Sandwich,Biscuit
5,T6,Coffee,Burger,
6,T7,Tea,Samosa,Cold Drink
7,T8,Tea,Sandwich,
8,T9,Coffee,Biscuit,
9,T10,Burger,Samosa,Cold Drink



Sample transaction lists (first 8):
['Tea', 'Samosa', 'Biscuit']
['Coffee', 'Sandwich']
['Tea', 'Samosa']
['Burger', 'Cold Drink']
['Tea', 'Sandwich', 'Biscuit']
['Coffee', 'Burger']
['Tea', 'Samosa', 'Cold Drink']
['Tea', 'Sandwich']

One-hot encoded shape: (15, 7)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,Biscuit,Burger,Coffee,Cold Drink,Samosa,Sandwich,Tea
0,True,False,False,False,True,False,True
1,False,False,True,False,False,True,False
2,False,False,False,False,True,False,True
3,False,True,False,True,False,False,False
4,True,False,False,False,False,True,True



Frequent itemsets (Apriori):


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,support,itemsets,length
0,0.533333,(Tea),1
1,0.4,(Cold Drink),1
2,0.333333,(Biscuit),1
3,0.333333,(Samosa),1
4,0.333333,(Sandwich),1
5,0.266667,"(Tea, Biscuit)",2
6,0.266667,"(Tea, Samosa)",2
7,0.266667,(Burger),1
8,0.2,"(Burger, Cold Drink)",2
9,0.2,(Coffee),1



Frequent itemsets (FP-Growth):


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,support,itemsets,length
0,0.533333,(Tea),1
1,0.4,(Cold Drink),1
2,0.333333,(Samosa),1
3,0.333333,(Biscuit),1
4,0.333333,(Sandwich),1
5,0.266667,"(Tea, Samosa)",2
6,0.266667,"(Tea, Biscuit)",2
7,0.266667,(Burger),1
8,0.2,"(Burger, Cold Drink)",2
9,0.2,(Coffee),1



Association rules (Apriori -> Rules with min_confidence = 0.50):


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Burger),(Cold Drink),0.2,0.75,1.875
1,(Cold Drink),(Burger),0.2,0.5,1.875
2,(Biscuit),(Tea),0.266667,0.8,1.5
3,(Samosa),(Tea),0.266667,0.8,1.5
4,(Tea),(Biscuit),0.266667,0.5,1.5
5,(Tea),(Samosa),0.266667,0.5,1.5



Saved: frequent_itemsets_apriori.csv, frequent_itemsets_fpgrowth.csv, association_rules.csv


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
