# 📦 Step 1: Importing Required Library

In [3]:
!conda list

# packages in environment at C:\Users\Pankaj\anaconda3:
#
# Name                    Version                   Build  Channel
_anaconda_depends         2024.10             py312_mkl_0  
absl-py                   2.2.1                    pypi_0    pypi
aiobotocore               2.12.3          py312haa95532_0  
aiohappyeyeballs          2.4.0           py312haa95532_0  
aiohttp                   3.10.5          py312h827c3e9_0  
aioitertools              0.7.1              pyhd3eb1b0_0  
aiosignal                 1.2.0              pyhd3eb1b0_0  
alabaster                 0.7.16          py312haa95532_0  
altair                    5.0.1           py312haa95532_0  
anaconda-anon-usage       0.4.4           py312hfc23b7f_100  
anaconda-catalogs         0.2.0           py312haa95532_1  
anaconda-client           1.12.3          py312haa95532_0  
anaconda-cloud-auth       0.5.1           py312haa95532_0  
anaconda-navigator        2.6.3           py312haa95532_0  
anaconda-project          0

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 📁 Step 2: Load Your Datasets

In [9]:
# Load and preprocess datasets
df_foodmart = pd.read_csv("foodmart_custom_10000.csv")
transactions_foodmart = df_foodmart["Transaction"].apply(lambda x: [str(i) for i in x.split(",")]).tolist()

df_market = pd.read_csv("Market_Basket_Optimisation.csv", header=None)
transactions_market = df_market.apply(lambda row: [str(item) for item in row.dropna()], axis=1).tolist()

df_retail = pd.read_excel("online_retail_II.xlsx", sheet_name="Year 2010-2011")
df_retail = df_retail[['Invoice', 'Description']].dropna()
df_retail['Invoice'] = df_retail['Invoice'].astype(str)
df_retail = df_retail[~df_retail['Invoice'].str.startswith('C')]
transactions_retail = df_retail.groupby('Invoice')['Description'].apply(lambda x: [str(i) for i in x]).tolist()



# 🔁 Step 3: Encode Transactions

In [10]:
from mlxtend.preprocessing import TransactionEncoder

def encode_transactions(transactions):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    return pd.DataFrame(te_ary, columns=te.columns_)


# ⚙️ Step 4: Run FP-Growth with Support Thresholds

In [13]:
from mlxtend.frequent_patterns import fpgrowth

def run_fp_growth(df_encoded, dataset_name):
    support_thresholds = [0.01, 0.02, 0.05]
    for support in support_thresholds:
        freq_itemsets = fpgrowth(df_encoded, min_support=support, use_colnames=True)
        print(f"\n📊 {dataset_name} | Support: {support}")
        print(freq_itemsets.head())
        print(f"🔢 Total frequent itemsets: {len(freq_itemsets)}")
    print("===="*25)

# ▶️ Step 5: Apply to All Datasets

In [14]:
# Foodmart
df_encoded_foodmart = encode_transactions(transactions_foodmart)
run_fp_growth(df_encoded_foodmart, "Foodmart Custom")

# Market Basket
df_encoded_market = encode_transactions(transactions_market)
run_fp_growth(df_encoded_market, "Market Basket")

# Online Retail
df_encoded_retail = encode_transactions(transactions_retail)
run_fp_growth(df_encoded_retail, "Online Retail II")



📊 Foodmart Custom | Support: 0.01
   support     itemsets
0   0.1709    (Carrots)
1   0.1693  (Chocolate)
2   0.1690       (Eggs)
3   0.1686      (Bread)
4   0.1666      (Chips)
🔢 Total frequent itemsets: 465

📊 Foodmart Custom | Support: 0.02
   support     itemsets
0   0.1709    (Carrots)
1   0.1693  (Chocolate)
2   0.1690       (Eggs)
3   0.1686      (Bread)
4   0.1666      (Chips)
🔢 Total frequent itemsets: 465

📊 Foodmart Custom | Support: 0.05
   support     itemsets
0   0.1709    (Carrots)
1   0.1693  (Chocolate)
2   0.1690       (Eggs)
3   0.1686      (Bread)
4   0.1666      (Chips)
🔢 Total frequent itemsets: 30

📊 Market Basket | Support: 0.01
    support          itemsets
0  0.238368   (mineral water)
1  0.132116       (green tea)
2  0.076523  (low fat yogurt)
3  0.071457          (shrimp)
4  0.065858       (olive oil)
🔢 Total frequent itemsets: 257

📊 Market Basket | Support: 0.02
    support          itemsets
0  0.238368   (mineral water)
1  0.132116       (green tea)
2  0

In [15]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

# Load and preprocess datasets
df_foodmart = pd.read_csv("foodmart_custom_10000.csv")
transactions_foodmart = df_foodmart["Transaction"].apply(lambda x: [str(i) for i in x.split(",")]).tolist()

df_market = pd.read_csv("Market_Basket_Optimisation.csv", header=None)
transactions_market = df_market.apply(lambda row: [str(item) for item in row.dropna()], axis=1).tolist()

df_retail = pd.read_excel("online_retail_II.xlsx", sheet_name="Year 2010-2011")
df_retail = df_retail[['Invoice', 'Description']].dropna()
df_retail['Invoice'] = df_retail['Invoice'].astype(str)
df_retail = df_retail[~df_retail['Invoice'].str.startswith('C')]
transactions_retail = df_retail.groupby('Invoice')['Description'].apply(lambda x: [str(i) for i in x]).tolist()

# FP-Growth function
def run_fp_growth(transactions, dataset_name, support_thresholds):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    for support in support_thresholds:
        freq_itemsets = fpgrowth(df, min_support=support, use_colnames=True)
        print(f"\n📊 Dataset: {dataset_name} | Support: {support}")
        print(freq_itemsets.head())
        print(f"🔢 Frequent Itemsets Count: {len(freq_itemsets)}")

# Apply FP-Growth
support_thresholds = [0.01, 0.02, 0.05]
run_fp_growth(transactions_foodmart, "Foodmart Custom", support_thresholds)
run_fp_growth(transactions_market, "Market Basket", support_thresholds)
run_fp_growth(transactions_retail, "Online Retail II", support_thresholds)



📊 Dataset: Foodmart Custom | Support: 0.01
   support     itemsets
0   0.1709    (Carrots)
1   0.1693  (Chocolate)
2   0.1690       (Eggs)
3   0.1686      (Bread)
4   0.1666      (Chips)
🔢 Frequent Itemsets Count: 465

📊 Dataset: Foodmart Custom | Support: 0.02
   support     itemsets
0   0.1709    (Carrots)
1   0.1693  (Chocolate)
2   0.1690       (Eggs)
3   0.1686      (Bread)
4   0.1666      (Chips)
🔢 Frequent Itemsets Count: 465

📊 Dataset: Foodmart Custom | Support: 0.05
   support     itemsets
0   0.1709    (Carrots)
1   0.1693  (Chocolate)
2   0.1690       (Eggs)
3   0.1686      (Bread)
4   0.1666      (Chips)
🔢 Frequent Itemsets Count: 30

📊 Dataset: Market Basket | Support: 0.01
    support          itemsets
0  0.238368   (mineral water)
1  0.132116       (green tea)
2  0.076523  (low fat yogurt)
3  0.071457          (shrimp)
4  0.065858       (olive oil)
🔢 Frequent Itemsets Count: 257

📊 Dataset: Market Basket | Support: 0.02
    support          itemsets
0  0.238368   (mine