In [10]:
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter import ttk
from itertools import combinations as combo

def load_and_preprocess(file_path, file_format, data_percentage):
    def load_excel(file_path):
        return pd.read_excel(file_path)
    
    def load_text(file_path):
        return pd.read_table(file_path, sep='\t')
    
    def load_csv(file_path):
        return pd.read_csv(file_path)
    
    loaders = {
        'Excel': load_excel,
        'Text': load_text,
        'CSV': load_csv
    }
    
    if file_format not in loaders:
        raise ValueError("Invalid file format. Please choose 'Excel', 'Text', or 'CSV'.")
    
    df = loaders[file_format](file_path)
    df['Items'] = df['Items'].astype(str)
    num_records = int(len(df) * data_percentage / 100)
    df = df.head(num_records)
    
    return df

def generate_candidates(frequent_itemsets, k):
    return {frozenset(set1 | set2) for set1 in frequent_itemsets for set2 in frequent_itemsets if len(set1 | set2) == k}


def prune_candidates(candidates, prev_frequent_itemsets):
    return {itemset for itemset in candidates if all(itemset - {item} in prev_frequent_itemsets for item in itemset)}


def find_frequent_itemsets(df, min_support):
    frequent_itemsets = {}
    transactions = df.groupby('TransactionNo')['Items'].apply(set)
    unique_items = {item for transaction in transactions for item in transaction}
    
    itemsets_size_1 = [frozenset({item}) for item in unique_items]
    frequent_itemsets[1] = {itemset: sum(1 for transaction in transactions if itemset.issubset(transaction)) for itemset in itemsets_size_1}
    
    k = 2
    while frequent_itemsets.get(k - 1):
        candidates = prune_candidates(generate_candidates(frequent_itemsets[k - 1].keys(), k), frequent_itemsets[k - 1].keys())
        supports = {itemset: sum(1 for transaction in transactions if itemset.issubset(transaction)) for itemset in candidates}
        frequent_itemsets[k] = {itemset: support for itemset, support in supports.items() if support >= min_support}
        k += 1
    
    frequent_itemsets = {k: v for k, v in frequent_itemsets.items() if v and any(support >= min_support for support in v.values())}
    return frequent_itemsets

def discover_association_rules(frequent_itemsets, df, min_confidence):
    association_rules = []
    transactions = df.groupby('TransactionNo')['Items'].apply(set)
    for size, itemsets in frequent_itemsets.items():
        if size > 1:
            for itemset, support in itemsets.items():
                for i in range(1, len(itemset)):
                    for antecedent in combo(itemset, i):
                        antecedent = frozenset(antecedent)
                        consequent = itemset - antecedent
                        if antecedent in frequent_itemsets[i]:
                            confidence = support / frequent_itemsets[i][antecedent]
                            if confidence >= min_confidence:
                                association_rules.append((antecedent, consequent, confidence))
    return association_rules


def show_frequent_itemsets(frequent_itemsets, min_support):
    output_text = "Frequent Item Sets:\n"
    for size in sorted(frequent_itemsets.keys()):
        itemsets = frequent_itemsets[size]
        for itemset, support in itemsets.items():
            if support >= min_support:
                output_text += "{} - Support: {}\n".format(list(itemset), support)
    return output_text


def show_association_rules(association_rules):
    output_text = "\nStrong Association Rules:\n"
    for rule in association_rules:
        antecedent, consequent, confidence = rule
        output_text += "{} => {} - Confidence: {:.2f}\n".format(list(antecedent), list(consequent), confidence)
    return output_text

def browse_file(entry):
    file_path = filedialog.askopenfilename()
    entry.delete(0, tk.END)
    entry.insert(0, file_path)

def run_analysis(file_entry, format_entry, support_entry, confidence_entry, percentage_entry, output_text):
    file_path = file_entry.get()
    file_format = format_entry.get()
    min_support = float(support_entry.get())
    min_confidence = float(confidence_entry.get()) / 100
    data_percentage = float(percentage_entry.get())
    
    try:
        df = load_and_preprocess(file_path, file_format, data_percentage)
        frequent_itemsets = find_frequent_itemsets(df, min_support)
        association_rules = discover_association_rules(frequent_itemsets, df, min_confidence)
        
        frequent_output = show_frequent_itemsets(frequent_itemsets, min_support)
        rules_output = show_association_rules(association_rules)
        
        output_text.config(state=tk.NORMAL)
        output_text.delete("1.0", tk.END)
        output_text.insert(tk.END, frequent_output + "\n" + rules_output)
        output_text.config(state=tk.DISABLED)
    except Exception as e:
        messagebox.showerror("Error", str(e))

def create_gui():
    root = tk.Tk()
    root.title("Association Analysis Tool Using Apriori Algorithm")
    root.configure(background='#E8F6EF')
    
    file_label = tk.Label(root, text="File Path:", background='#E8F6EF')
    file_label.grid(row=0, column=0, padx=5, pady=5, sticky="e")
    file_entry = tk.Entry(root, width=50)
    file_entry.grid(row=0, column=1, columnspan=2, padx=5, pady=5)
    browse_button = tk.Button(root, text="Browse", command=lambda: browse_file(file_entry))
    browse_button.grid(row=0, column=3, padx=5, pady=5)
    
    format_label = tk.Label(root, text="File Format (Excel,Text,CSV):", background='#E8F6EF')
    format_label.grid(row=1, column=0, padx=5, pady=5, sticky="e")
    format_entry = tk.Entry(root, width=20)
    format_entry.grid(row=1, column=1, padx=5, pady=5)
    
    support_label = tk.Label(root, text="Minimum Support Count:", background='#E8F6EF')
    support_label.grid(row=2, column=0, padx=5, pady=5, sticky="e")
    support_entry = tk.Entry(root, width=20)
    support_entry.grid(row=2, column=1, padx=5, pady=5)
    
    confidence_label = tk.Label(root, text="Minimum Confidence (%):", background='#E8F6EF')
    confidence_label.grid(row=3, column=0, padx=5, pady=5, sticky="e")
    confidence_entry = tk.Entry(root, width=20)
    confidence_entry.grid(row=3, column=1, padx=5, pady=5)
    
    percentage_label = tk.Label(root, text="Percentage of Data to Read:", background='#E8F6EF')
    percentage_label.grid(row=4, column=0, padx=5, pady=5, sticky="e")
    percentage_entry = tk.Entry(root, width=20)
    percentage_entry.grid(row=4, column=1, padx=5, pady=5)
    
    run_button = tk.Button(root, text="Run Analysis", command=lambda: run_analysis(file_entry, format_entry, support_entry, confidence_entry, percentage_entry, output_text))
    run_button.grid(row=5, column=1, padx=5, pady=5)
    
    output_frame = tk.Frame(root)
    output_frame.grid(row=6, column=0, columnspan=4, padx=5, pady=5)
    
    output_text = tk.Text(output_frame, height=20, width=70)
    output_text.pack(side=tk.LEFT, fill=tk.Y)
    
    scrollbar = ttk.Scrollbar(output_frame, orient=tk.VERTICAL, command=output_text.yview)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    output_text.config(yscrollcommand=scrollbar.set)
    
    root.mainloop()

if __name__ == "__main__":
    create_gui()
