<a href="https://colab.research.google.com/github/JeyScientist/Artificial-Intelligence/blob/main/Boolean_Association_Rules.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()

Saving single.csv to single.csv


In [None]:
!pip install pyECLAT

Collecting pyECLAT
  Downloading pyECLAT-1.0.2-py3-none-any.whl.metadata (4.0 kB)
Downloading pyECLAT-1.0.2-py3-none-any.whl (6.3 kB)
Installing collected packages: pyECLAT
Successfully installed pyECLAT-1.0.2


In [None]:
!pip install colorama

Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6


In [None]:
import pandas as pd

# Step 1: Read the data (assuming semicolon-separated values)
df = pd.read_csv('single.csv', sep=';')

# Step 2: Clean column names to handle spaces or special characters
df.columns = df.columns.str.strip()  # Remove leading/trailing spaces
df.columns = df.columns.str.replace(r'[^a-zA-Z0-9_]', '', regex=True)  # Remove special characters

# Step 3: Print the cleaned column names to check if 'Country' exists
print("Cleaned Column Names:")
print(df.columns)

# Step 4: Handle "wrongly coded" entries in 'Itemname'
df['Itemname'] = df['Itemname'].replace(r'wrongly coded.*', 'Unknown Item', regex=True)

# Step 5: Drop columns with completely empty values
df = df.dropna(axis=1, how='all')

# Step 6: Remove columns that start with 'Unnamed' (e.g., 'Country,,,')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Step 7: Clean 'Price' column: Replace commas with dots and convert to numeric
df['Price'] = df['Price'].replace(',', '.', regex=True)  # Replace commas with dots
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')  # Convert to numeric, coerce errors

# Step 8: Clean 'Quantity' column: Convert to numeric
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

# Step 9: Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Step 10: Handle missing values in 'CustomerID' and 'Country'
# Ensure 'CustomerID' and 'Country' are available in cleaned columns
if 'CustomerID' in df.columns:
    df['CustomerID'] = df['CustomerID'].fillna('Unknown')  # Fill missing CustomerID with 'Unknown'
else:
    print("'CustomerID' column is missing!")

if 'Country' in df.columns:
    df['Country'] = df['Country'].fillna('Unknown')  # Fill missing Country with 'Unknown'
else:
    print("'Country' column is missing!")

# Step 11: Show a snippet of the cleaned dataframe
print("Cleaned DataFrame:")
print(df.head())

# Step 12: Check for any remaining "Unknown Item" entries (i.e., wrongly coded items)
print("\nRows with 'Unknown Item':")
print(df[df['Itemname'].str.contains('Unknown Item', na=False)])

# Optional: Save the cleaned DataFrame to a new CSV file
df.to_csv('cleaned_data.csv', index=False)

Cleaned Column Names:
Index(['BillNo', 'Itemname', 'Quantity', 'Date', 'Price', 'CustomerID',
       'Country'],
      dtype='object')
Cleaned DataFrame:
   BillNo                             Itemname  Quantity                Date  \
0  536365   WHITE HANGING HEART T-LIGHT HOLDER       6.0 2010-01-12 08:26:00   
1  536365                  WHITE METAL LANTERN       6.0 2010-01-12 08:26:00   
2  536365       CREAM CUPID HEARTS COAT HANGER       8.0 2010-01-12 08:26:00   
3  536365  KNITTED UNION FLAG HOT WATER BOTTLE       6.0 2010-01-12 08:26:00   
4  536365       RED WOOLLY HOTTIE WHITE HEART.       6.0 2010-01-12 08:26:00   

   Price CustomerID           Country  
0   2.55    17850.0  United Kingdom,,  
1   3.39    17850.0  United Kingdom,,  
2   2.75    17850.0  United Kingdom,,  
3   3.39    17850.0  United Kingdom,,  
4   3.39    17850.0  United Kingdom,,  

Rows with 'Unknown Item':
        BillNo      Itemname  Quantity                Date  Price CustomerID  \
366291  569830  Un

In [None]:
df.head()

Unnamed: 0,BillNo,Itemname,Quantity,Date,Price,CustomerID,Country
0,536365,WHITE HANGING HEART T-LIGHT HOLDER,6.0,2010-01-12 08:26:00,2.55,17850.0,"United Kingdom,,"
1,536365,WHITE METAL LANTERN,6.0,2010-01-12 08:26:00,3.39,17850.0,"United Kingdom,,"
2,536365,CREAM CUPID HEARTS COAT HANGER,8.0,2010-01-12 08:26:00,2.75,17850.0,"United Kingdom,,"
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE,6.0,2010-01-12 08:26:00,3.39,17850.0,"United Kingdom,,"
4,536365,RED WOOLLY HOTTIE WHITE HEART.,6.0,2010-01-12 08:26:00,3.39,17850.0,"United Kingdom,,"


In [None]:
df["Itemname"]=df["Itemname"]
df["Itemname"]

Unnamed: 0,Itemname
0,WHITE HANGING HEART T-LIGHT HOLDER
1,WHITE METAL LANTERN
2,CREAM CUPID HEARTS COAT HANGER
3,KNITTED UNION FLAG HOT WATER BOTTLE
4,RED WOOLLY HOTTIE WHITE HEART.
...,...
522059,PACK OF 20 SPACEBOY NAPKINS
522060,CHILDREN'S APRON DOLLY GIRL
522061,CHILDRENS CUTLERY DOLLY GIRL
522062,CHILDRENS CUTLERY CIRCUS PARADE


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty:
            return 0
        return rules[self.metric].mean()

    def grid_search(self, transactions, algorithm='apriori',
                    support_range=np.arange(0.1, 0.5, 0.1),
                    confidence_range=np.arange(0.5, 0.9, 0.1)):
        """Automated grid search for optimal support and confidence"""
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")
        for support, confidence in tqdm(product(support_range, confidence_range),
                                      total=len(support_range)*len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                print(Fore.RED + f"Error with support {support} and confidence {confidence}: {e}")
                continue

        if results:
            return pd.DataFrame(results).sort_values('score', ascending=False)
        else:
            print(Fore.YELLOW + f"No valid rules found for {algorithm.upper()} with current support/confidence ranges.")
            return pd.DataFrame(columns=['support', 'confidence', 'score', 'num_rules'])

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data from DataFrame"""
    try:
        transactions = df[items_column].dropna().apply(
            lambda x: [item.strip() for item in str(x).split(',')]
        )
        print(f"✅ Successfully loaded {len(transactions)} transactions")
        return transactions.tolist()
    except KeyError:
        available_cols = df.columns.tolist()
        print(f"Error: Column '{items_column}' not found. Available columns: {available_cols}")
        for possible_name in ['items', 'products', 'item_name', 'item', 'Itemname']:
            if possible_name in df.columns:
                print(f"Using alternative column name: {possible_name}")
                transactions = df[possible_name].dropna().apply(
                    lambda x: [item.strip() for item in str(x).split(',')]
                )
                return transactions.tolist()
        raise ValueError(f"No suitable transaction column found. Available columns: {available_cols}")

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n+1]  # Exclude self and get top_n
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    # Combine and deduplicate recommendations
    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item]  # Remove self if present

    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using all algorithms with optimal parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # Grid search for Apriori
    apriori_results = miner.grid_search(transactions, algorithm='apriori')
    print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
    print(miner.best_params)

    # Grid search for FP-Growth
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')
    print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
    print(miner.best_params)

    # Get best rules from both algorithms
    best_rules = miner.best_rules.copy()

    # Add ECLAT rules (simplified implementation)
    try:
        eclat = ECLAT(data=pd.DataFrame({'Transactions': {i: t for i, t in enumerate(transactions)}}))
        _, eclat_rules = eclat.fit(min_support=miner.best_params.get('support', 0.1))

        eclat_formatted = []
        for itemset, support in eclat_rules.items():
            if len(itemset) >= 2:
                # Here we create simple rules by splitting the itemset
                for i in range(1, len(itemset)):
                    eclat_formatted.append({
                        'antecedents': frozenset(itemset[:i]),
                        'consequents': frozenset(itemset[i:]),
                        'support': support,
                        'confidence': None,
                        'lift': None
                    })

        eclat_df = pd.DataFrame(eclat_formatted)
        best_rules = pd.concat([best_rules, eclat_df], ignore_index=True)
    except Exception as e:
        print(Fore.RED + f"ECLAT Error: {str(e)}")

    return best_rules, apriori_results, fp_results

# MAIN EXECUTION WITH YOUR DATAFRAME
if __name__ == "__main__":
    # Load your existing dataframe here
    # df = pd.read_csv('your_data.csv')  # Uncomment and modify as needed

    # Ensure you have a DataFrame named df with 'Itemname' column
    try:
        df
        if 'Itemname' not in df.columns:
            raise KeyError
    except (NameError, KeyError):
        print(Fore.RED + "⚠️ Please load your DataFrame with 'Itemname' column first")
        # Create example data if no DataFrame exists
        data = {
            'Itemname': [
                'milk,bread,eggs',
                'milk,bread',
                'bread,eggs',
                'milk,eggs',
                'bread,butter',
                'milk,bread,butter,eggs'
            ]
        }
        df = pd.DataFrame(data)
        print(Fore.YELLOW + "⚠️ Using example data instead")

    # Prepare data
    transactions = load_and_prepare_data(df, items_column='Itemname')

    # Generate rules with automatic parameter tuning
    best_rules, apriori_results, fp_results = generate_all_rules(transactions, metric='lift')

    # Prepare recommendation engine components
    te = TransactionEncoder()
    encoded_matrix = te.fit_transform(transactions)
    encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
    cosine_sim = cosine_similarity(encoded_df.T)

    # Example recommendations
    test_item = 'milk'  # Change this to test different items
    item_based_recommendation(test_item, encoded_df, cosine_sim)
    popularity_based_recommendation(encoded_df)
    association_based_recommendation(test_item, best_rules)
    hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

    # Show grid search results
    print(Fore.CYAN + "\n📊 Apriori Grid Search Results:")
    print(apriori_results.head())

    print(Fore.CYAN + "\n📊 FP-Growth Grid Search Results:")
    print(fp_results.head())

✅ Successfully loaded 999 transactions

🔍 Performing Grid Search for APRIORI (optimizing lift)...


100%|██████████| 16/16 [00:00<00:00, 93.24it/s] 


No valid rules found for APRIORI with current support/confidence ranges.

🏆 Best Apriori Parameters:
{}

🔍 Performing Grid Search for FPGROWTH (optimizing lift)...


100%|██████████| 16/16 [00:00<00:00, 26.46it/s]


No valid rules found for FPGROWTH with current support/confidence ranges.

🏆 Best FP-Growth Parameters:
{}
ECLAT Error: unhashable type: 'list'

⚠️ Item 'milk' not found in dataset

🏆 Popularity-based recommendations: ['WHITE HANGING HEART T-LIGHT HOLDER', 'HAND WARMER UNION JACK', 'HAND WARMER SCOTTY DOG DESIGN', 'JAM MAKING SET PRINTED', 'RED WOOLLY HOTTIE WHITE HEART.']


KeyError: 'antecedents'

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty:
            return 0
        return rules[self.metric].mean()

    def grid_search(self, transactions, algorithm='apriori',
                   support_range=np.arange(0.1, 0.5, 0.1),
                   confidence_range=np.arange(0.5, 0.9, 0.1)):
        """Automated grid search for optimal support and confidence"""
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")
        for support, confidence in tqdm(product(support_range, confidence_range),
                                      total=len(support_range)*len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                print(Fore.YELLOW + f"Warning: {str(e)}")
                continue

        # Create DataFrame only if we have results
        if results:
            results_df = pd.DataFrame(results)
            # Only sort if 'score' column exists
            if 'score' in results_df.columns:
                return results_df.sort_values('score', ascending=False)
            return results_df
        return pd.DataFrame()  # Return empty DataFrame if no results

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data from DataFrame"""
    try:
        transactions = df[items_column].dropna().apply(
            lambda x: [item.strip() for item in str(x).split(',')]
        )
        print(f"✅ Successfully loaded {len(transactions)} transactions")
        return transactions.tolist()
    except KeyError:
        available_cols = df.columns.tolist()
        print(f"Error: Column '{items_column}' not found. Available columns: {available_cols}")
        for possible_name in ['items', 'products', 'item_name', 'item', 'Itemname']:
            if possible_name in df.columns:
                print(f"Using alternative column name: {possible_name}")
                transactions = df[possible_name].dropna().apply(
                    lambda x: [item.strip() for item in str(x).split(',')]
                )
                return transactions.tolist()
        raise ValueError(f"No suitable transaction column found. Available columns: {available_cols}")

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n+1]  # Exclude self and get top_n
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    if rules.empty:
        print(Fore.YELLOW + "\nℹ️ No association rules available")
        return []

    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    # Combine and deduplicate recommendations
    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item]  # Remove self if present

    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using all algorithms with optimal parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # Grid search for Apriori
    apriori_results = miner.grid_search(transactions, algorithm='apriori')
    print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
    print(miner.best_params)

    # Grid search for FP-Growth
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')
    print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
    print(miner.best_params)

    # Get best rules from both algorithms
    best_rules = miner.best_rules

    # Add ECLAT rules (simplified implementation)
    try:
        eclat = ECLAT(data=pd.DataFrame({'Transactions': {i: t for i, t in enumerate(transactions)}}))
        _, eclat_rules = eclat.fit(min_support=miner.best_params.get('support', 0.1))

        eclat_formatted = []
        for itemset, support in eclat_rules.items():
            if len(itemset) >= 2:
                for i in range(1, len(itemset)):
                    eclat_formatted.append({
                        'antecedents': frozenset(itemset[:i]),
                        'consequents': frozenset(itemset[i:]),
                        'support': support,
                        'confidence': None,
                        'lift': None
                    })

        eclat_df = pd.DataFrame(eclat_formatted)
        best_rules = pd.concat([best_rules, eclat_df], ignore_index=True)
    except Exception as e:
        print(Fore.RED + f"ECLAT Error: {str(e)}")

    return best_rules, apriori_results, fp_results

# MAIN EXECUTION WITH YOUR DATAFRAME
if __name__ == "__main__":
    # Load your existing dataframe here
    # df = pd.read_csv('your_data.csv')  # Uncomment and modify as needed

    # Ensure you have a DataFrame named df with 'Itemname' column
    if 'df' not in locals() or 'Itemname' not in df.columns:
        print(Fore.RED + "⚠️ Please load your DataFrame with 'Itemname' column first")
        # Create example data if no DataFrame exists
        data = {
            'Itemname': [
                'milk,bread,eggs',
                'milk,bread',
                'bread,eggs',
                'milk,eggs',
                'bread,butter',
                'milk,bread,butter,eggs'
            ]
        }
        df = pd.DataFrame(data)
        print(Fore.YELLOW + "⚠️ Using example data instead")

    # Prepare data
    transactions = load_and_prepare_data(df, items_column='Itemname')

    # Generate rules with automatic parameter tuning
    best_rules, apriori_results, fp_results = generate_all_rules(transactions, metric='lift')

    # Only proceed if we got valid results
    if not best_rules.empty:
        # Prepare recommendation engine
        te = TransactionEncoder()
        encoded_matrix = te.fit_transform(transactions)
        encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
        cosine_sim = cosine_similarity(encoded_df.T)

        # Example recommendations
        test_item = 'milk'  # Change this to test different items
        item_based_recommendation(test_item, encoded_df, cosine_sim)
        popularity_based_recommendation(encoded_df)
        association_based_recommendation(test_item, best_rules)
        hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

        # Show grid search results
        if not apriori_results.empty:
            print(Fore.CYAN + "\n📊 Apriori Grid Search Results:")
            print(apriori_results.head())

        if not fp_results.empty:
            print(Fore.CYAN + "\n📊 FP-Growth Grid Search Results:")
            print(fp_results.head())
    else:
        print(Fore.RED + "⚠️ No valid rules were generated from the data")

⚠️ Please load your DataFrame with 'Itemname' column first
⚠️ Using example data instead
✅ Successfully loaded 6 transactions

🔍 Performing Grid Search for APRIORI (optimizing lift)...


100%|██████████| 16/16 [00:00<00:00, 26.13it/s]



🏆 Best Apriori Parameters:
{'support': np.float64(0.1), 'confidence': np.float64(0.7999999999999999), 'algorithm': 'apriori'}

🔍 Performing Grid Search for FPGROWTH (optimizing lift)...


100%|██████████| 16/16 [00:00<00:00, 32.75it/s]



🏆 Best FP-Growth Parameters:
{'support': np.float64(0.1), 'confidence': np.float64(0.7999999999999999), 'algorithm': 'fpgrowth'}
ECLAT Error: unhashable type: 'list'

🎯 Item-based recommendations for 'milk': ['eggs', 'bread', 'butter']

🏆 Popularity-based recommendations: ['bread', 'eggs', 'milk', 'butter']

🔗 Association-based recommendations for 'milk': ['bread', 'eggs']

🎯 Item-based recommendations for 'milk': ['eggs', 'bread', 'butter']

🔗 Association-based recommendations for 'milk': ['bread', 'eggs']

🏆 Popularity-based recommendations: ['bread', 'eggs', 'milk', 'butter']

✨ Hybrid recommendations for 'milk': ['bread', 'eggs', 'butter']

📊 Apriori Grid Search Results:
    support  confidence     score  num_rules
3       0.1         0.8  1.480000         10
2       0.1         0.7  1.346429         14
1       0.1         0.6  1.234211         19
11      0.3         0.8  1.200000          1
7       0.2         0.8  1.200000          1

📊 FP-Growth Grid Search Results:
    support

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty:
            return 0
        return rules[self.metric].mean()

    def grid_search(self, transactions, algorithm='apriori',
                   support_range=np.arange(0.1, 0.5, 0.1),
                   confidence_range=np.arange(0.5, 0.9, 0.1)):
        """Automated grid search for optimal support and confidence"""
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")
        for support, confidence in tqdm(product(support_range, confidence_range),
                                      total=len(support_range)*len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                print(Fore.YELLOW + f"Warning: {str(e)}")
                continue

        # Create DataFrame only if we have results
        if results:
            results_df = pd.DataFrame(results)
            # Only sort if 'score' column exists
            if 'score' in results_df.columns:
                return results_df.sort_values('score', ascending=False)
            return results_df
        return pd.DataFrame()  # Return empty DataFrame if no results

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data from DataFrame"""
    try:
        transactions = df[items_column].dropna().apply(
            lambda x: [item.strip() for item in str(x).split(',')]
        )
        print(f"✅ Successfully loaded {len(transactions)} transactions")
        return transactions.tolist()
    except KeyError:
        available_cols = df.columns.tolist()
        print(f"Error: Column '{items_column}' not found. Available columns: {available_cols}")
        for possible_name in ['items', 'products', 'item_name', 'item', 'Itemname']:
            if possible_name in df.columns:
                print(f"Using alternative column name: {possible_name}")
                transactions = df[possible_name].dropna().apply(
                    lambda x: [item.strip() for item in str(x).split(',')]
                )
                return transactions.tolist()
        raise ValueError(f"No suitable transaction column found. Available columns: {available_cols}")

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n+1]  # Exclude self and get top_n
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    if rules.empty:
        print(Fore.YELLOW + "\nℹ️ No association rules available")
        return []

    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    # Combine and deduplicate recommendations
    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item]  # Remove self if present

    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using all algorithms with optimal parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # Grid search for Apriori
    apriori_results = miner.grid_search(transactions, algorithm='apriori')
    print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
    print(miner.best_params)

    # Grid search for FP-Growth
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')
    print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
    print(miner.best_params)

    # Get best rules from both algorithms
    best_rules = miner.best_rules

    # Add ECLAT rules (simplified implementation)
    try:
        eclat = ECLAT(data=pd.DataFrame({'Transactions': {i: t for i, t in enumerate(transactions)}}))
        _, eclat_rules = eclat.fit(min_support=miner.best_params.get('support', 0.1))

        eclat_formatted = []
        for itemset, support in eclat_rules.items():
            if len(itemset) >= 2:
                for i in range(1, len(itemset)):
                    eclat_formatted.append({
                        'antecedents': frozenset(itemset[:i]),
                        'consequents': frozenset(itemset[i:]),
                        'support': support,
                        'confidence': None,
                        'lift': None
                    })

        eclat_df = pd.DataFrame(eclat_formatted)
        best_rules = pd.concat([best_rules, eclat_df], ignore_index=True)
    except Exception as e:
        print(Fore.RED + f"ECLAT Error: {str(e)}")

    return best_rules, apriori_results, fp_results

# MAIN EXECUTION WITH YOUR DATAFRAME



    # Generate rules with automatic parameter tuning
    best_rules, apriori_results, fp_results = generate_all_rules(df["itemname"], metric='lift')

    # Only proceed if we got valid results
    if not best_rules.empty:
        # Prepare recommendation engine
        te = TransactionEncoder()
        encoded_matrix = te.fit_transform(transactions)
        encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
        cosine_sim = cosine_similarity(encoded_df.T)

        # Example recommendations
        test_item = 'milk'  # Change this to test different items
        item_based_recommendation(test_item, encoded_df, cosine_sim)
        popularity_based_recommendation(encoded_df)
        association_based_recommendation(test_item, best_rules)
        hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

        # Show grid search results
        if not apriori_results.empty:
            print(Fore.CYAN + "\n📊 Apriori Grid Search Results:")
            print(apriori_results.head())

        if not fp_results.empty:
            print(Fore.CYAN + "\n📊 FP-Growth Grid Search Results:")
            print(fp_results.head())
    else:
        print(Fore.RED + "⚠️ No valid rules were generated from the data")

In [None]:
df["itemname"]

KeyError: 'itemname'

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty or self.metric not in rules.columns:
            return 0
        return rules[self.metric].mean()

    def grid_search(self, transactions, algorithm='apriori',
                    support_range=np.arange(0.1, 0.5, 0.1),
                    confidence_range=np.arange(0.5, 0.9, 0.1)):
        """Automated grid search for optimal support and confidence"""
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")
        for support, confidence in tqdm(product(support_range, confidence_range),
                                        total=len(support_range) * len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                print(Fore.RED + f"Error with support {support} and confidence {confidence}: {e}")
                continue

        if results:
            return pd.DataFrame(results).sort_values('score', ascending=False)
        else:
            print(Fore.YELLOW + f"No valid rules found for {algorithm.upper()} with current support/confidence ranges.")
            return pd.DataFrame(columns=['support', 'confidence', 'score', 'num_rules'])

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data from DataFrame"""
    transactions = df[items_column].dropna().apply(
        lambda x: [item.strip() for item in str(x).split(',')]
    )
    print(f"✅ Successfully loaded {len(transactions)} transactions")
    return transactions.tolist()

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n + 1]  # Exclude self
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    if 'antecedents' not in rules.columns or 'consequents' not in rules.columns:
        print(Fore.YELLOW + "⚠️ No valid association rules available.")
        return []

    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item]
    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using all algorithms with optimal parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # Apriori
    apriori_results = miner.grid_search(transactions, algorithm='apriori')
    print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
    print(miner.best_params)

    # FP-Growth
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')
    print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
    print(miner.best_params)

    # ECLAT
    best_rules = miner.best_rules.copy()
    try:
        eclat = ECLAT(data=pd.DataFrame({'Transactions': {i: t for i, t in enumerate(transactions)}}))
        _, eclat_rules = eclat.fit(min_support=miner.best_params.get('support', 0.1))

        eclat_formatted = []
        for itemset, support in eclat_rules.items():
            if len(itemset) >= 2:
                for i in range(1, len(itemset)):
                    eclat_formatted.append({
                        'antecedents': frozenset(itemset[:i]),
                        'consequents': frozenset(itemset[i:]),
                        'support': support,
                        'confidence': None,
                        'lift': None
                    })

        eclat_df = pd.DataFrame(eclat_formatted)
        best_rules = pd.concat([best_rules, eclat_df], ignore_index=True)
    except Exception as e:
        print(Fore.RED + f"ECLAT Error: {str(e)}")

    return best_rules, apriori_results, fp_results

# ===== MAIN EXECUTION =====
if __name__ == "__main__":
    # Ensure your DataFrame 'df' is loaded and has 'itemname'
    if 'itemname' not in df.columns:
        raise ValueError("❌ Column 'itemname' not found in your DataFrame.")

    df.rename(columns={'itemname': 'Itemname'}, inplace=True)
    transactions = load_and_prepare_data(df, items_column='Itemname')

    best_rules, apriori_results, fp_results = generate_all_rules(transactions, metric='lift')

    te = TransactionEncoder()
    encoded_matrix = te.fit_transform(transactions)
    encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
    cosine_sim = cosine_similarity(encoded_df.T)

    test_item = 'milk'  # change this to any item in your dataset
    item_based_recommendation(test_item, encoded_df, cosine_sim)
    popularity_based_recommendation(encoded_df)
    association_based_recommendation(test_item, best_rules)
    hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

    print(Fore.CYAN + "\n📊 Apriori Grid Search Results:")
    print(apriori_results.head())

    print(Fore.CYAN + "\n📊 FP-Growth Grid Search Results:")
    print(fp_results.head())

ValueError: ❌ Column 'itemname' not found in your DataFrame.

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty or self.metric not in rules.columns:
            return 0
        return rules[self.metric].mean()

    def grid_search(self, transactions, algorithm='apriori',
                    support_range=np.arange(0.1, 0.5, 0.1),
                    confidence_range=np.arange(0.5, 0.9, 0.1)):
        """Automated grid search for optimal support and confidence"""
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")
        for support, confidence in tqdm(product(support_range, confidence_range),
                                        total=len(support_range) * len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                print(Fore.RED + f"Error with support {support} and confidence {confidence}: {e}")
                continue

        if results:
            return pd.DataFrame(results).sort_values('score', ascending=False)
        else:
            print(Fore.YELLOW + f"No valid rules found for {algorithm.upper()} with current support/confidence ranges.")
            return pd.DataFrame(columns=['support', 'confidence', 'score', 'num_rules'])

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data from DataFrame"""
    transactions = df[items_column].dropna().apply(
        lambda x: [item.strip() for item in str(x).split(',')]
    )
    print(f"✅ Successfully loaded {len(transactions)} transactions")
    return transactions.tolist()

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n + 1]  # Exclude self
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    if 'antecedents' not in rules.columns or 'consequents' not in rules.columns:
        print(Fore.YELLOW + "⚠️ No valid association rules available.")
        return []

    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item]
    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using all algorithms with optimal parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # Apriori
    apriori_results = miner.grid_search(transactions, algorithm='apriori')
    print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
    print(miner.best_params)

    # FP-Growth
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')
    print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
    print(miner.best_params)

    # ECLAT
    best_rules = miner.best_rules.copy()
    try:
        eclat = ECLAT(data=pd.DataFrame({'Transactions': {i: t for i, t in enumerate(transactions)}}))
        _, eclat_rules = eclat.fit(min_support=miner.best_params.get('support', 0.1))

        eclat_formatted = []
        for itemset, support in eclat_rules.items():
            if len(itemset) >= 2:
                for i in range(1, len(itemset)):
                    eclat_formatted.append({
                        'antecedents': frozenset(itemset[:i]),
                        'consequents': frozenset(itemset[i:]),
                        'support': support,
                        'confidence': None,
                        'lift': None
                    })

        eclat_df = pd.DataFrame(eclat_formatted)
        best_rules = pd.concat([best_rules, eclat_df], ignore_index=True)
    except Exception as e:
        print(Fore.RED + f"ECLAT Error: {str(e)}")

    return best_rules, apriori_results, fp_results

# ===== MAIN EXECUTION =====
if __name__ == "__main__":
    # Ensure your DataFrame 'df' is loaded and has 'Itemname' column
    if 'Itemname' not in df.columns:
        raise ValueError("❌ Column 'Itemname' not found in your DataFrame.")

    transactions = load_and_prepare_data(df, items_column='Itemname')

    best_rules, apriori_results, fp_results = generate_all_rules(transactions, metric='lift')

    te = TransactionEncoder()
    encoded_matrix = te.fit_transform(transactions)
    encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
    cosine_sim = cosine_similarity(encoded_df.T)

    test_item = 'milk'  # change this to any item in your dataset
    item_based_recommendation(test_item, encoded_df, cosine_sim)
    popularity_based_recommendation(encoded_df)
    association_based_recommendation(test_item, best_rules)
    hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

    print(Fore.CYAN + "\n📊 Apriori Grid Search Results:")
    print(apriori_results.head())

    print(Fore.CYAN + "\n📊 FP-Growth Grid Search Results:")
    print(fp_results.head())

✅ Successfully loaded 519910 transactions

🔍 Performing Grid Search for APRIORI (optimizing lift)...


100%|██████████| 16/16 [00:34<00:00,  2.13s/it]


No valid rules found for APRIORI with current support/confidence ranges.

🏆 Best Apriori Parameters:
{}

🔍 Performing Grid Search for FPGROWTH (optimizing lift)...


  0%|          | 0/16 [00:00<?, ?it/s]

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from pyECLAT import ECLAT
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()
        self.metric_values = []  # To track metric values over iterations

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty or self.metric not in rules.columns:
            return 0
        metric_value = rules[self.metric].mean()
        self.metric_values.append(metric_value)  # Store the current metric value
        return metric_value

    def grid_search(self, transactions, algorithm='apriori',
                    support_range=np.arange(0.1, 0.5, 0.1),
                    confidence_range=np.arange(0.5, 0.9, 0.1)):
        """Automated grid search for optimal support and confidence"""
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")
        for support, confidence in tqdm(product(support_range, confidence_range),
                                        total=len(support_range) * len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        # Update the best score and parameters
                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules

                        # Display the updated metric after each iteration
                        print(Fore.GREEN + f"Updated {self.metric} after support={support}, confidence={confidence}: {score}")

            except Exception as e:
                print(Fore.RED + f"Error with support {support} and confidence {confidence}: {e}")
                continue

        if results:
            return pd.DataFrame(results).sort_values('score', ascending=False)
        else:
            print(Fore.YELLOW + f"No valid rules found for {algorithm.upper()} with current support/confidence ranges.")
            return pd.DataFrame(columns=['support', 'confidence', 'score', 'num_rules'])

    def get_metric_values(self):
        """Returns all recorded metric values"""
        return self.metric_values

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data from DataFrame"""
    transactions = df[items_column].dropna().apply(
        lambda x: [item.strip() for item in str(x).split(',')]
    )
    print(f"✅ Successfully loaded {len(transactions)} transactions")
    return transactions.tolist()

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n + 1]  # Exclude self
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    if 'antecedents' not in rules.columns or 'consequents' not in rules.columns:
        print(Fore.YELLOW + "⚠️ No valid association rules available.")
        return []

    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item]
    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using all algorithms with optimal parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # Apriori
    apriori_results = miner.grid_search(transactions, algorithm='apriori')
    print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
    print(miner.best_params)

    # FP-Growth
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')
    print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
    print(miner.best_params)

    # ECLAT
    best_rules = miner.best_rules.copy()
    try:
        eclat = ECLAT(data=pd.DataFrame({'Transactions': {i: t for i, t in enumerate(transactions)}}))
        _, eclat_rules = eclat.fit(min_support=miner.best_params.get('support', 0.1))

        eclat_formatted = []
        for itemset, support in eclat_rules.items():
            if len(itemset) >= 2:
                for i in range(1, len(itemset)):
                    eclat_formatted.append({
                        'antecedents': frozenset(itemset[:i]),
                        'consequents': frozenset(itemset[i:]),
                        'support': support,
                        'confidence': None,
                        'lift': None
                    })

        eclat_df = pd.DataFrame(eclat_formatted)
        best_rules = pd.concat([best_rules, eclat_df], ignore_index=True)
    except Exception as e:
        print(Fore.RED + f"ECLAT Error: {str(e)}")

    return best_rules, apriori_results, fp_results

# ===== MAIN EXECUTION =====
if __name__ == "__main__":
    # Ensure your DataFrame 'df' is loaded and has 'Itemname' column
    if 'Itemname' not in df.columns:
        raise ValueError("❌ Column 'Itemname' not found in your DataFrame.")

    transactions = load_and_prepare_data(df, items_column='Itemname')

    # Call the generate_all_rules function
    best_rules, apriori_results, fp_results = generate_all_rules(transactions, metric='lift')

    # Print the Apriori Grid Search Results
    print(Fore.CYAN + "\n📊 Apriori Grid Search Results:")
    print(apriori_results.head())

    # Show all the evaluated metric values during grid search
    print(Fore.CYAN + "\n🔢 Evaluation Metric Values (lift):")
    print(miner.get_metric_values())

    # Generate encoded matrix and cosine similarity
    te = TransactionEncoder()
    encoded_matrix = te.fit_transform(transactions)
    encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
    cosine_sim = cosine_similarity(encoded_df.T)

    # Define test item and generate recommendations
    test_item = 'milk'  # change this to any item in your dataset
    item_based_recommendation(test_item, encoded_df, cosine_sim)
    popularity_based_recommendation(encoded_df)
    association_based_recommendation(test_item, best_rules)
    hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

✅ Successfully loaded 519910 transactions

🔍 Performing Grid Search for APRIORI (optimizing lift)...


100%|██████████| 16/16 [00:34<00:00,  2.14s/it]


No valid rules found for APRIORI with current support/confidence ranges.

🏆 Best Apriori Parameters:
{}

🔍 Performing Grid Search for FPGROWTH (optimizing lift)...


  0%|          | 0/16 [00:00<?, ?it/s]

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from itertools import product
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()

    def evaluate_rules(self, rules):
        if rules.empty or self.metric not in rules.columns:
            return 0
        return rules[self.metric].mean()

    def grid_search(self, transactions, algorithm='apriori',
                   support_range=np.arange(0.01, 0.1, 0.02),
                   confidence_range=np.arange(0.3, 0.8, 0.1)):
        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")

        # Sample a subset if dataset is too large
        if len(transactions) > 10000:
            sample_transactions = np.random.choice(transactions, size=10000, replace=False)
            te_ary = te.fit_transform(sample_transactions)
            df_encoded = pd.DataFrame(te_ary, columns=te.columns_)
            print(Fore.YELLOW + "⚠️ Using 10,000 transaction sample for performance")

        for support, confidence in tqdm(product(support_range, confidence_range),
                                      total=len(support_range)*len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True, max_len=4)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True, max_len=4)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                continue

        if results:
            return pd.DataFrame(results).sort_values('score', ascending=False)
        return pd.DataFrame()

def load_and_prepare_data(df, items_column='Itemname'):
    try:
        transactions = df[items_column].dropna().apply(
            lambda x: [item.strip() for item in str(x).split(',')]
        )
        print(f"✅ Successfully loaded {len(transactions)} transactions")
        return transactions.tolist()
    except KeyError:
        available_cols = df.columns.tolist()
        raise ValueError(f"Column '{items_column}' not found. Available columns: {available_cols}")

# Main execution


    # Prepare data
    transactions = load_and_prepare_data(df, items_column='Itemname')

    # Initialize miner
    miner = AssociationRuleMiner(metric='lift')

    # Run grid search with more appropriate parameters
    print(Fore.BLUE + "\n⚙️ Running with optimized parameters for large dataset...")
    results = miner.grid_search(
        transactions,
        algorithm='fpgrowth',
        support_range=np.arange(0.005, 0.05, 0.005),
        confidence_range=np.arange(0.2, 0.7, 0.1)
    )

    if not results.empty:
        print(Fore.GREEN + "\n🏆 Best Parameters:")
        print(miner.best_params)
        print(Fore.CYAN + "\nTop Rules:")
        print(miner.best_rules.head())
    else:
        print(Fore.RED + "\n⚠️ No rules found. Try lowering support/confidence thresholds further.")

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth, association_rules
from itertools import product
from collections import defaultdict
from colorama import Fore, init
import warnings
from tqdm import tqdm

init(autoreset=True)
warnings.filterwarnings("ignore")

class AssociationRuleMiner:
    def __init__(self, metric='lift'):
        self.metric = metric
        self.best_params = {}
        self.best_rules = pd.DataFrame()
        self.metric_values = []

    def evaluate_rules(self, rules):
        """Evaluate rules using selected metric"""
        if rules.empty or self.metric not in rules.columns:
            return 0
        metric_value = rules[self.metric].mean()
        self.metric_values.append(metric_value)
        return metric_value

    def grid_search(self, transactions, algorithm='fpgrowth',
                   support_range=np.arange(0.005, 0.051, 0.005),
                   confidence_range=np.arange(0.2, 0.7, 0.1),
                   sample_size=20000):
        """Optimized grid search for large datasets"""
        # Sample transactions if dataset is large
        if len(transactions) > sample_size:
            print(Fore.YELLOW + f"⚠️ Sampling {sample_size} transactions for performance")
            transactions = np.random.choice(transactions, size=sample_size, replace=False)

        te = TransactionEncoder()
        te_ary = te.fit_transform(transactions)
        df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

        best_score = -1
        results = []

        print(Fore.CYAN + f"\n🔍 Performing Grid Search for {algorithm.upper()} (optimizing {self.metric})...")

        for support, confidence in tqdm(product(support_range, confidence_range),
                                      total=len(support_range)*len(confidence_range)):
            try:
                if algorithm == 'apriori':
                    freq_items = apriori(df_encoded, min_support=support, use_colnames=True, max_len=4)
                elif algorithm == 'fpgrowth':
                    freq_items = fpgrowth(df_encoded, min_support=support, use_colnames=True, max_len=4)
                else:
                    continue

                if not freq_items.empty:
                    rules = association_rules(freq_items, metric="confidence", min_threshold=confidence)
                    if not rules.empty:
                        score = self.evaluate_rules(rules)
                        results.append({
                            'support': support,
                            'confidence': confidence,
                            'score': score,
                            'num_rules': len(rules)
                        })

                        if score > best_score:
                            best_score = score
                            self.best_params = {
                                'support': support,
                                'confidence': confidence,
                                'algorithm': algorithm
                            }
                            self.best_rules = rules
            except Exception as e:
                continue

        if results:
            return pd.DataFrame(results).sort_values('score', ascending=False)
        print(Fore.YELLOW + f"No valid rules found for {algorithm.upper()} with current parameters")
        return pd.DataFrame()

    def get_metric_values(self):
        return self.metric_values

def load_and_prepare_data(df, items_column='Itemname'):
    """Prepare transaction data with robust column handling"""
    try:
        # Try to find the items column if exact name doesn't exist
        if items_column not in df.columns:
            for col in df.columns:
                if 'item' in col.lower():
                    items_column = col
                    print(Fore.YELLOW + f"⚠️ Using column '{col}' as items column")
                    break

        transactions = df[items_column].dropna().apply(
            lambda x: [item.strip().lower() for item in str(x).split(',') if item.strip()]
        )
        print(f"✅ Successfully loaded {len(transactions)} transactions")
        return transactions.tolist()
    except Exception as e:
        print(Fore.RED + f"Error preparing data: {str(e)}")
        raise

def item_based_recommendation(item, encoded_df, cosine_sim, top_n=5):
    """Generate recommendations based on item similarity"""
    try:
        item = item.lower()
        item_index = list(encoded_df.columns).index(item)
        sim_scores = list(enumerate(cosine_sim[item_index]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:top_n+1]  # Exclude self
        recommendations = [encoded_df.columns[i] for i, _ in sim_scores]
        print(Fore.GREEN + f"\n🎯 Item-based recommendations for '{item}': {recommendations}")
        return recommendations
    except ValueError:
        print(Fore.RED + f"\n⚠️ Item '{item}' not found in dataset")
        return []

def popularity_based_recommendation(encoded_df, top_n=5):
    """Generate recommendations based on overall popularity"""
    item_counts = encoded_df.sum().sort_values(ascending=False)
    popular_items = list(item_counts.head(top_n).index)
    print(Fore.GREEN + f"\n🏆 Popularity-based recommendations: {popular_items}")
    return popular_items

def association_based_recommendation(item, rules, top_n=5):
    """Generate recommendations based on association rules"""
    if rules.empty or 'antecedents' not in rules.columns:
        print(Fore.YELLOW + "⚠️ No valid association rules available.")
        return []

    item = item.lower()
    item_rules = rules[rules['antecedents'].apply(lambda x: item in x)]
    if not item_rules.empty:
        item_rules = item_rules.sort_values('lift', ascending=False)
        recommendations = list(set().union(*item_rules.head(top_n)['consequents'].apply(list)))
        print(Fore.GREEN + f"\n🔗 Association-based recommendations for '{item}': {recommendations}")
        return recommendations
    else:
        print(Fore.YELLOW + f"\nℹ️ No association rules found for '{item}'")
        return []

def hybrid_recommendation(item, encoded_df, cosine_sim, rules, top_n=5):
    """Combine multiple recommendation approaches"""
    item_based = item_based_recommendation(item, encoded_df, cosine_sim, top_n)
    assoc_based = association_based_recommendation(item, rules, top_n)
    popular = popularity_based_recommendation(encoded_df, top_n)

    combined = list(set(item_based + assoc_based + popular))
    combined = [i for i in combined if i != item.lower()]
    print(Fore.BLUE + f"\n✨ Hybrid recommendations for '{item}': {combined[:top_n]}")
    return combined[:top_n]

def generate_all_rules(transactions, metric='lift'):
    """Generate rules using optimized parameters"""
    miner = AssociationRuleMiner(metric=metric)

    # First try FP-Growth (faster for large datasets)
    fp_results = miner.grid_search(transactions, algorithm='fpgrowth')

    if not fp_results.empty:
        print(Fore.GREEN + "\n🏆 Best FP-Growth Parameters:")
        print(miner.best_params)
    else:
        # Fall back to Apriori if FP-Growth fails
        print(Fore.YELLOW + "\n⚠️ Trying Apriori as fallback...")
        apriori_results = miner.grid_search(transactions, algorithm='apriori')
        if not apriori_results.empty:
            print(Fore.GREEN + "\n🏆 Best Apriori Parameters:")
            print(miner.best_params)

    return miner.best_rules, miner.best_params

def main():
    try:
        # Load your data
        if 'df' not in locals():
            raise ValueError("Please load your DataFrame first")

        # Prepare data
        transactions = load_and_prepare_data(df)

        # Generate rules with automatic parameter tuning
        best_rules, best_params = generate_all_rules(transactions)

        if best_rules.empty:
            print(Fore.RED + "\n❌ No rules generated. Try adjusting parameters or cleaning data.")
            return

        # Prepare recommendation engine components
        te = TransactionEncoder()
        encoded_matrix = te.fit_transform(transactions)
        encoded_df = pd.DataFrame(encoded_matrix, columns=te.columns_)
        cosine_sim = cosine_similarity(encoded_df.T)

        # Example recommendations
        test_item = 'milk'  # Change this to test different items
        hybrid_recommendation(test_item, encoded_df, cosine_sim, best_rules)

        # Show top rules
        print(Fore.CYAN + "\n📊 Top Association Rules:")
        print(best_rules.sort_values('lift', ascending=False).head(10))

    except Exception as e:
        print(Fore.RED + f"\n❌ Error in main execution: {str(e)}")

if __name__ == "__main__":
    main()