In [None]:
from mlxtend.frequent_patterns import fpgrowth,association_rules, apriori
import pandas as pd
import numpy as np
from itertools import combinations

class AssociationRulesGenerator:
    def __init__(self, confidence=0.6, min_support=0.3):
        self.conf= confidence
        self.min_supp = min_support

    
    def get_item_set_with_brute_force(self, items_df):
        # Data Preprocessing
        items_df = items_df.fillna(0)
        items = items_df.columns
        
        # Generate item sets of size 1 with their support
        freq_item_set1 = {k: round(items_df[k].sum() / len(items_df), 3) for k in items }
        freq_item_set_df = pd.DataFrame(list(freq_item_set1.items()), columns=["freq_item_sets", "Support"])
        r_keys = list(freq_item_set1.keys())
        
        print(f"Item Set 1 \n {freq_item_set_df}")
        item_set_no = 2

        while len(r_keys) != 0:
            # Generate item sets of size item_set_no
            combi = combinations(r_keys, item_set_no)
            support_val_dict = {}
            for j in combi:
                # Calculate support for item sets of size item_set_no
                t = np.prod(items_df.loc[:, j].values, axis=1)
                support_val_dict[j] = round(sum(t == 1) / len(items_df), 3)

            if len(support_val_dict) == 0:
                r_keys = []
                break

            item_set_df = pd.DataFrame(list(support_val_dict.items()), columns=["freq_item_sets", "Support"])
            
            if len(item_set_df[item_set_df['Support'] >= self.min_supp]) == 0:
                r_keys = []
                break
            print(f"freq_item_set_{item_set_no}\n{item_set_df}")
            freq_item_set_df = pd.concat([freq_item_set_df, item_set_df])
            item_set_no += 1

        freq_item_set_df = freq_item_set_df[freq_item_set_df['Support'] >= self.min_supp].reset_index(drop=True)
        print(f"Final Item Sets : \n {freq_item_set_df}")
        return freq_item_set_df
    
    def get_item_set_with_fp_growth(self, items_df):
        frequent_patterns = fpgrowth(items_df, min_support=self.min_supp,use_colnames=True)
        
        return frequent_patterns
    def get_item_set_with_apriori(self, items_df):
        frequent_patterns = apriori(items_df, min_support=self.min_supp, use_colnames=True)
        
        return frequent_patterns
    
    def format_rule(self, rule):
        antecedent = ", ".join(rule['antecedents'])
        consequent = ", ".join(rule['consequents'])
        confidence = round(rule['confidence'], 3)
        return f"Rule: {antecedent} => {consequent}; confidence: {confidence}"

    #Method to generate rules
    def association_rules(self, df):
        # Extract support values for each item set
        support = pd.Series(df.Support.values, index=df.freq_item_sets).to_dict()
        i = 1
        # Generate association rules
        for k, v in support.items():
            if isinstance(k, tuple):
                # Calculate and print confidence for each association rule
                confi = round(support[k] / support[k[1]], 3)
                rule = f"Rule {i}: {k[0]} => {k[1:]}; confidence : {confi}"
                status = "Selected" if confi > self.conf else "Rejected"
                print(f"{rule} {status}")
                i+=1

def read_data_set(items_file_path, transactions_file_path):
    """The function reads items_excel and transactions excel and generates a one-hot-encoded dataframe

    Args:
        items_file_path (str): Path to items excel
        transactions_file_path (str): Path to transactions excel

    Returns:
        DataFrame: One-hot-encoded dataframe
    """
    items_df = pd.read_excel(items_file_path)
    transactions_df = pd.read_excel(transactions_file_path)
    # Get list of unique items
    unique_items = items_df['Item Name'].tolist()

    # Create empty dataframe with columns as unique items
    encoded_df = pd.DataFrame(columns=unique_items)

    # Iterate through each transaction
    for index, transaction in transactions_df.iterrows():
        transaction_items = transaction['Transaction'].split(', ')
        # Create a dictionary to store one-hot encoding for current transaction
        transaction_encoding = {item: 0 for item in unique_items}
        # Set 1 for items present in current transaction
        for item in transaction_items:
            transaction_encoding[item] = 1
        # Append transaction encoding to dataframe
        encoded_df = pd.concat([encoded_df, pd.DataFrame([transaction_encoding], columns=unique_items)], ignore_index=True)

    # Fill NaN values with 0
    encoded_df = encoded_df.fillna(0)
    return encoded_df


def execute_association(association_rules_generator, item_file_path, transaction_file_path, algorithm: str):
    # Load data from file
    data = read_data_set(item_file_path, transaction_file_path)

    # Execute the specified association algorithm
    print(f"Executing using {algorithm} Algorithm")

    if algorithm == "Apriori":
        print("Executing apriori")

        supp_df = association_rules_generator.get_item_set_with_apriori(data)
        print("Itemsets")
        print(supp_df)
        rules_ap = association_rules(supp_df)
        rules_ap = rules_ap[['antecedents','consequents','confidence']]

        print("Rules:")
        
        for i, rule in rules_ap.iterrows():
            formatted_rule = association_rules_generator.format_rule(rule)
            status = "Selected" if rule['confidence'] > association_rules_generator.conf else "Rejected"
            print(f"Rule {i+1} {formatted_rule}; {status}")

    elif algorithm == "Brute-force":
        supp_df = association_rules_generator.get_item_set_with_brute_force(data)
        # Print association rules
        print("Rules:")
        print(association_rules_generator.association_rules(supp_df))

    elif algorithm == "FP-growth":
        print("Executing FP-tree growth")

        supp_df = association_rules_generator.get_item_set_with_fp_growth(data)
        print("Itemsets")
        print(supp_df)
        rules_fp = association_rules(supp_df)
        rules_fp = rules_fp[['antecedents','consequents','confidence']]

        print("Rules:")
        
        for i, rule in rules_fp.iterrows():
            formatted_rule = association_rules_generator.format_rule(rule)
            status = "Selected" if rule['confidence'] > association_rules_generator.conf else "Rejected"
            print(f"Rule {i+1} {formatted_rule}; {status}")

    else:
        print("Invalid algorithm selected.")
        return

    



def main():
    flag = 1
    while flag:
        print("\n")
        print("Select your Dataset")
        print("1. Amazon")
        print("2. Generic")
        print("3. Best Buy")
        print("4. K-Mart")
        print("5. Nike")
        dataset_selection = int(input("Enter your choice: "))
        print("\n")
        if dataset_selection == 1:
            item_file_path = "Data/amazon_items.xlsx"
            transaction_file_path = "Data/amazon_transactions.xlsx"
        elif dataset_selection == 2:
            item_file_path = "Data/generic_items.xlsx"
            transaction_file_path = "Data/generic_transactions.xlsx"
        elif dataset_selection == 3:
            item_file_path = "Data/best_buy_items.xlsx"
            transaction_file_path = "Data/best_buy_transactions.xlsx"
        elif dataset_selection == 4:
            item_file_path = "Data/kmart_items.xlsx"
            transaction_file_path = "Data/kmart_transactions.xlsx"
        elif dataset_selection == 5:
            item_file_path = "data/nike_items.xlsx"
            transaction_file_path = "Data/nike_transactions.xlsx"
        else:
            print("Invalid choice")
            break
        min_support = float(input(f"Enter min_threshold_support : "))/100
        min_confidence = float(input(f"Enter min_threshold_confidence : "))/100
        if (min_support < 0 or min_confidence < 0) and (min_confidence <0 or min_confidence>100):
            print("Invalid input")
            break
        else:
            association_rules_generator = AssociationRulesGenerator(confidence=min_confidence, min_support=min_support)
            execute_association(association_rules_generator, item_file_path=item_file_path,transaction_file_path=transaction_file_path, algorithm="Brute-force")
            execute_association(association_rules_generator, item_file_path=item_file_path,transaction_file_path=transaction_file_path, algorithm="Apriori")
            execute_association(association_rules_generator, item_file_path=item_file_path,transaction_file_path=transaction_file_path, algorithm="FP-growth")
            print("\n")


    
    

if __name__ == "__main__":
    main()




Select your Dataset
1. Amazon
2. Generic
3. Best Buy
4. K-Mart
5. Nike
Enter your choice: 5


Enter min_threshold_support : 60
Enter min_threshold_confidence : 60


See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

Executing using Brute-force Algorithm
Item Set 1 
    freq_item_sets  Support
0    Running Shoe     0.70
1     Soccer Shoe     0.30
2           Socks     0.65
3  Swimming Shirt     0.55
4  Dry Fit V-Neck     0.45
5      Rash Guard     0.60
6     Sweatshirts     0.65
7         Hoodies     0.40
8      Tech Pants     0.45
9    Modern Pants     0.50
freq_item_set_2
                      freq_item_sets  Support
0        (Running Shoe, Soccer Shoe)     0.20
1              (Running Shoe, Socks)     0.55
2     (Running Shoe, Swimming Shirt)     0.30
3     (Running Shoe, Dry Fit V-Neck)     0.25
4         (Running Shoe, Rash Guard)     0.35
5        (Running Shoe, Sweatshirts)     0.55
6            (Running Shoe, Hoodies)     0.25
7         (Running Shoe, Tech Pants)     0.30
8       (Running Shoe, Modern Pants)     0.45
9               (Soccer Shoe, Socks)     0.25
10     (Soccer Shoe, Swimming Shirt)     0.20
11     (Soccer Shoe, Dry Fit V-Neck)     0.25
12         (Soccer Shoe, Rash Guard)  

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the 

Executing using FP-growth Algorithm
Executing FP-tree growth
Itemsets
   support              itemsets
0     0.70        (Running Shoe)
1     0.65         (Sweatshirts)
2     0.65               (Socks)
3     0.60          (Rash Guard)
4     0.60  (Sweatshirts, Socks)
Rules:
Rule 1 Rule: Sweatshirts => Socks; confidence: 0.923; Selected
Rule 2 Rule: Socks => Sweatshirts; confidence: 0.923; Selected




Select your Dataset
1. Amazon
2. Generic
3. Best Buy
4. K-Mart
5. Nike
