In [1]:
#UsingApriori Algorithm
import tkinter as tk
from tkinter import filedialog, messagebox, Scrollbar
import pandas as pd
from itertools import combinations
def preprocess_data(df):
    df = df[['TransactionNo', 'Items']].copy() #Create a copy of the selecting attributes(TransactionNo,Items)
    df.drop_duplicates(inplace=True) #remove duplicate rows
    return df
 #open file from browser
def open_file():
    global df
    file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx")])
    if file_path:
        try:
            if file_path.endswith('.csv'):
                df = pd.read_csv(file_path)
            elif file_path.endswith('.xlsx'):
                df = pd.read_excel(file_path)
                
            df = preprocess_data(df)
            update_info_label(f"File Loaded: {file_path}")
        except Exception as e:
            messagebox.showerror("Error", f"Error loading file: {e}")

def update_info_label(message):
    info_text.config(state=tk.NORMAL)
    info_text.delete("1.0", tk.END)
    info_text.insert(tk.END, message)
    info_text.config(state=tk.DISABLED)

def generate_candidate_itemsets(transactions, k):
    candidate_itemsets = []
    for transaction in transactions:
        candidate_itemsets.extend(combinations(sorted(transaction), k))
    return list(set(candidate_itemsets)) #set to remove duplicate items

def count_support(transactions, candidate_itemsets):
    support_counts = {}
    for itemset in candidate_itemsets:
        for transaction in transactions:
            if set(itemset).issubset(transaction):
                support_counts[itemset] = support_counts.get(itemset, 0) + 1
    return support_counts

def generate_frequent_itemsets(support_counts, min_supportcount):
    frequent_itemsets = {itemset: support for itemset, support in support_counts.items() if support >= min_supportcount}
    return frequent_itemsets

def generate_association_rules(frequent_itemsets, min_confidence):
    association_rules = set()  # Use a set to avoid duplicates
    max_length = max(len(itemset) for itemset in frequent_itemsets.keys())
    
    for itemset in frequent_itemsets.keys():
        if len(itemset) == max_length:
            if len(itemset) > 1:
                for i in range(1, len(itemset)):
                    for antecedent in combinations(sorted(itemset), i):
                        consequent = tuple(sorted(set(itemset) - set(antecedent)))
                        confidence = frequent_itemsets[itemset] / frequent_itemsets[antecedent]
                        if confidence >= min_confidence:
                            rule = (antecedent, consequent, confidence)
                            # Check for duplicates before adding
                            is_duplicate = False
                            for existing_rule in association_rules:
                                if set(rule[0]) == set(existing_rule[0]) and set(rule[1]) == set(existing_rule[1]):
                                    is_duplicate = True
                                    break
                            if not is_duplicate:
                                association_rules.add(rule)
    return association_rules


def process_data():
    global df
    try:
        min_support_count = int(min_support_entry.get())
        min_confidence = float(min_confidence_entry.get()) / 100
        percentage = float(percentage_entry.get()) / 100
        if df.empty:
            messagebox.showerror("Error", "No data loaded. Please load a file.")
            return
        num_records_to_read = int(len(df) * percentage)
        df_subset = df.head(num_records_to_read)
        
        transactions = df_subset.groupby('TransactionNo')['Items'].apply(list).tolist()
        
        frequent_itemsets = {}
        k = 1  
        while True:  # Loop indefinitely until break
            candidate_itemsets = generate_candidate_itemsets(transactions, k)
            support_counts = count_support(transactions, candidate_itemsets)
            new_frequent_itemsets = generate_frequent_itemsets(support_counts, min_support_count)
            
            # Check if new frequent itemsets are generated
            if not new_frequent_itemsets:
                break 
            
            # Update frequent_itemsets with new frequent itemsets
            frequent_itemsets.update(new_frequent_itemsets)
            
            # Increment k for the next iteration
            k += 1
        
        association_rules = generate_association_rules(frequent_itemsets, min_confidence)
        
        Apriori_Algorithm_text="Using Apriori Algorithm :\n" 
        frequent_item_sets_text = "Frequent Item Sets:\n"
        for itemset, support in frequent_itemsets.items():
            frequent_item_sets_text += str(itemset) + " : " + str(support) + "\n"

        strong_association_rules_text = "\nStrong Association Rules:\n"
        for rule in association_rules:
            strong_association_rules_text += f"{rule[0]} -> {rule[1]} : {rule[2]}\n"

        update_info_label(frequent_item_sets_text + strong_association_rules_text)
    except Exception as e:
        messagebox.showerror("Error", f"Error processing data: {e}")

panel = tk.Tk()
panel.title("Association Rule Mining Tool")
panel.geometry("800x600")
panel.configure(bg="lightblue")

title_label = tk.Label(panel, text="Association Rule Mining Tool", font=("Arial", 20), bg="lightblue")
title_label.pack(pady=20)

open_button = tk.Button(panel, text="Open File", command=open_file, font=("Arial", 12))
open_button.pack(pady=10)

percentage_label = tk.Label(panel, text="Percentage of Data to Read (%):", font=("Arial", 12), bg="lightblue")
percentage_label.pack()
percentage_entry = tk.Entry(panel, font=("Arial", 12))
percentage_entry.pack()

min_support_label = tk.Label(panel, text="Minimum Support Count:", font=("Arial", 12), bg="lightblue")
min_support_label.pack()
min_support_entry = tk.Entry(panel, font=("Arial", 12))
min_support_entry.pack()

min_confidence_label = tk.Label(panel, text="Minimum Confidence (%):", font=("Arial", 12), bg="lightblue")
min_confidence_label.pack()
min_confidence_entry = tk.Entry(panel, font=("Arial", 12))
min_confidence_entry.pack()

process_button = tk.Button(panel, text="Process Data", command=process_data, font=("Arial", 12))
process_button.pack(pady=20)

output_frame = tk.Frame(panel)
output_frame.pack(fill=tk.BOTH, expand=True)

info_text = tk.Text(output_frame, wrap="none")
info_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

scrollbar = tk.Scrollbar(output_frame, command=info_text.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

info_text.config(yscrollcommand=scrollbar.set)

panel.mainloop()
