In [2]:
pip install mlxtend

Collecting mlxtend
  Obtaining dependency information for mlxtend from https://files.pythonhosted.org/packages/1c/07/512f6a780239ad6ce06ce2aa7b4067583f5ddcfc7703a964a082c706a070/mlxtend-0.23.1-py3-none-any.whl.metadata
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   - -------------------------------------- 0.1/1.4 MB 544.7 kB/s eta 0:00:03
   ----------------------- ---------------- 0.8/1.4 MB 6.0 MB/s eta 0:00:01
   ---------------------------------------  1.4/1.4 MB 9.2 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 7.1 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
import time
import pandas as pd
from math import comb
from itertools import combinations
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [5]:
def load_and_display_dataset(choice):
    dataset_paths = {
        1: 'C:/Users/kanna/Desktop/Project/amazon.csv',
        2: 'C:/Users/kanna/Desktop/Project/bestbuy.csv',
        3: 'C:/Users/kanna/Desktop/Project/kmart.csv',
        4: 'C:/Users/kanna/Desktop/Project/nike.csv',
        5: 'C:/Users/kanna/Desktop/Project/generic.csv'
    }

    try:
        if choice in dataset_paths:
            df = pd.read_csv(dataset_paths[choice])
            return df
        else:
            print("Invalid choice. Please select a number between 1 and 5.")
            return None
    except FileNotFoundError:
        print(f"File not found for choice {choice}. Please check the file path and try again.")
        return None

try:
    choice = int(input("Please, Select your Dataset for \n 1 Amazon.\n 2 BestBuy.\n 3 K-Mart.\n 4 Nike.\n 5 Generic. \n"))
    df = load_and_display_dataset(choice)
    if df is not None:
        print(df)
except ValueError:
    print("Please enter a valid integer.")


Please, Select your Dataset for 
 1 Amazon.
 2 BestBuy.
 3 K-Mart.
 4 Nike.
 5 Generic. 
2
   Transaction ID                                        Transaction
0          Trans1  Desk Top,  Printer,  Flash Drive,  Microsoft O...
1          Trans2  Lab Top,  Flash Drive,  Microsoft Office,  Lab...
2          Trans3  Lab Top,  Printer,  Flash Drive,  Microsoft Of...
3          Trans4  Lab Top,  Printer,  Flash Drive,  Anti-Virus, ...
4          Trans5  Lab Top,  Flash Drive,  Lab Top Case,  Anti-Virus
5          Trans6  Lab Top,  Printer,  Flash Drive,  Microsoft Of...
6          Trans7  Desk Top,  Printer,  Flash Drive,  Microsoft O...
7          Trans8         Lab Top,  External Hard-Drive,  Anti-Virus
8          Trans9  Desk Top,  Printer,  Flash Drive,  Microsoft O...
9         Trans10  Digital Camera ,  Lab Top,  Desk Top,  Printer...
10        Trans11  Lab Top,  Desk Top,  Lab Top Case,  External H...
11        Trans12  Digital Camera ,  Lab Top,  Lab Top Case,  Ext...
12        Tr

In [10]:
min_sup = input("Please, input your Min. Support \n")
min_sup = float(min_sup)
min_con = input("Please, input your Min. confidence \n")
min_con = float(min_con)


Please, input your Min. Support 
0.1
Please, input your Min. confidence 
0.1


In [12]:

unique_transactions = df['Transaction ID'].unique()
transaction_items = df['Transaction'].tolist()

transactions = transaction_items

def frequent_items(new_patterns, current_items):
    items_in_patterns = set(item for pattern in new_patterns for item in pattern)
    return [item for item in current_items if item in items_in_patterns]

def find_frequent_patterns(transactions, min_support):
    unique_items = set(item for sublist in transactions for item in sublist)
    pattern_size = 1
    frequent_patterns = []
    frequent_patterns_count = []
    current_frequent_items = list(unique_items)
    while current_frequent_items:
        potential_patterns = combinations(current_frequent_items, pattern_size)
        new_frequent_patterns = []
        for pattern in list(potential_patterns):
            count = sum(1 for transaction in transactions if set(pattern).issubset(set(transaction)))
            if count >= min_support * len(transactions):
                new_frequent_patterns.append(pattern)
                frequent_patterns_count.append(count)
        frequent_patterns.extend(new_frequent_patterns)
        pattern_size += 1
        current_frequent_items = frequent_items(new_frequent_patterns, current_frequent_items)
    return frequent_patterns, frequent_patterns_count

def generate_association_rules(frequent_patterns, frequent_patterns_count, transactions, min_confidence):
    rules_with_confidence = []
    for pattern, pattern_count in zip(frequent_patterns, frequent_patterns_count):
        if len(pattern) > 1:
            sub_patterns = [sub_pattern for i in range(1, len(pattern))
                            for sub_pattern in combinations(pattern, i)]
            for sub_pattern in sub_patterns:
                sub_pattern_count = sum(1 for transaction in transactions if set(sub_pattern).issubset(set(transaction)))
                if sub_pattern_count > 0:  # Avoid division by zero
                    confidence = pattern_count / sub_pattern_count
                    if confidence >= min_confidence:
                        consequence = set(pattern) - set(sub_pattern)
                        rules_with_confidence.append(((tuple(sub_pattern), tuple(consequence)), confidence))
    return rules_with_confidence

def format_rules_for_printing(rules_with_confidence):
    formatted_rules = []
    for (antecedent, consequent), confidence in rules_with_confidence:
        rule_string = f"{antecedent} ---> {consequent} with confidence = {confidence:.2f}"
        formatted_rules.append(rule_string)
    return formatted_rules
start_time = time.time()
frequent_patterns, frequent_patterns_count = find_frequent_patterns(transactions, min_sup)
rules_with_confidence = generate_association_rules(frequent_patterns, frequent_patterns_count, transactions, min_con)
end_time = time.time()
bruteapriori_runtime = end_time - start_time

formatted_rules = format_rules_for_printing(rules_with_confidence)

def print_frequent_patterns_and_rules(frequent_patterns, frequent_patterns_count, transactions, min_confidence,formatted_rules):
    print("Frequent patterns:\n")
    for pattern, count in zip(frequent_patterns, frequent_patterns_count):
        print(f"{pattern}, support: {count/len(transactions):.2f}")
    print('\nAssociation rules:')
    for rule in formatted_rules:
        print(rule)

print_frequent_patterns_and_rules(frequent_patterns, frequent_patterns_count, transactions, min_con,formatted_rules)

print(f"Brute-forced Apriori runtime: {bruteapriori_runtime} seconds")


Frequent patterns:

('  Lab Top',), support: 0.25
('  Flash Drive',), support: 0.65
('  Microsoft Office',), support: 0.55
('Desk Top',), support: 0.15
('Lab Top',), support: 0.35
('  Lab Top Case',), support: 0.70
('  Anti-Virus',), support: 0.70
('Digital Camera ',), support: 0.40
('  External Hard-Drive',), support: 0.45
('  Speakers',), support: 0.55
('  Printer',), support: 0.45
('  Desk Top',), support: 0.15
('  Lab Top', '  Flash Drive'), support: 0.10
('  Lab Top', '  Microsoft Office'), support: 0.10
('  Lab Top', '  Lab Top Case'), support: 0.25
('  Lab Top', '  Anti-Virus'), support: 0.20
('  Lab Top', 'Digital Camera '), support: 0.25
('  Lab Top', '  External Hard-Drive'), support: 0.10
('  Lab Top', '  Speakers'), support: 0.20
('  Lab Top', '  Printer'), support: 0.10
('  Flash Drive', '  Microsoft Office'), support: 0.55
('  Flash Drive', 'Desk Top'), support: 0.15
('  Flash Drive', 'Lab Top'), support: 0.25
('  Flash Drive', '  Lab Top Case'), support: 0.45
('  Flash D

In [13]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

start_time = time.time()

frequent_itemsets = apriori(df_encoded, min_support=min_sup, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_con)

end_time = time.time()
apriori_runtime = end_time - start_time

def display_output_like_brute_force(frequent_itemsets, rules):
    print("Frequent patterns:\n")
    for index, row in frequent_itemsets.iterrows():
        print(f"{list(row['itemsets'])}, support: {row['support']}")

    print("\nAssociation rules:")
    for index, row in rules.iterrows():
        print(f"{list(row['antecedents'])} ---> {list(row['consequents'])} with confidence = {row['confidence']:.2f}")

display_output_like_brute_force(frequent_itemsets, rules)

print(f"Apriori runtime: {apriori_runtime} seconds")


Frequent patterns:

['  Anti-Virus'], support: 0.7
['  Desk Top'], support: 0.15
['  External Hard-Drive'], support: 0.45
['  Flash Drive'], support: 0.65
['  Lab Top'], support: 0.25
['  Lab Top Case'], support: 0.7
['  Microsoft Office'], support: 0.55
['  Printer'], support: 0.45
['  Speakers'], support: 0.55
['Desk Top'], support: 0.15
['Digital Camera '], support: 0.4
['Lab Top'], support: 0.35
['  Anti-Virus', '  Desk Top'], support: 0.1
['  Anti-Virus', '  External Hard-Drive'], support: 0.45
['  Anti-Virus', '  Flash Drive'], support: 0.5
['  Anti-Virus', '  Lab Top'], support: 0.2
['  Anti-Virus', '  Lab Top Case'], support: 0.6
['  Anti-Virus', '  Microsoft Office'], support: 0.4
['  Anti-Virus', '  Printer'], support: 0.3
['  Anti-Virus', '  Speakers'], support: 0.45
['  Anti-Virus', 'Desk Top'], support: 0.1
['  Anti-Virus', 'Digital Camera '], support: 0.2
['  Anti-Virus', 'Lab Top'], support: 0.3
['  External Hard-Drive', '  Desk Top'], support: 0.1
['  Flash Drive', '  D

In [14]:
start_time = time.time()

frequent_itemsets_fp = fpgrowth(df_encoded, min_support=0.1, use_colnames=True)

rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.1)

end_time = time.time()
fpgrowth_runtime = end_time - start_time

def display_output_like_brute_force(frequent_itemsets, rules):
    print("Frequent patterns:\n")
    for index, row in frequent_itemsets.iterrows():
        print(f"{list(row['itemsets'])}, support: {row['support']}")

    print("\nAssociation rules:")
    for index, row in rules.iterrows():
        print(f"{list(row['antecedents'])} ---> {list(row['consequents'])} with confidence = {row['confidence']:.2f}")

display_output_like_brute_force(frequent_itemsets_fp, rules_fp)

print(f"FP-Growth runtime: {fpgrowth_runtime} seconds")

Frequent patterns:

['  Anti-Virus'], support: 0.7
['  Flash Drive'], support: 0.65
['  Speakers'], support: 0.55
['  Microsoft Office'], support: 0.55
['  Printer'], support: 0.45
['Desk Top'], support: 0.15
['  Lab Top Case'], support: 0.7
['Lab Top'], support: 0.35
['  External Hard-Drive'], support: 0.45
['Digital Camera '], support: 0.4
['  Lab Top'], support: 0.25
['  Desk Top'], support: 0.15
['  Anti-Virus', '  Lab Top Case'], support: 0.6
['  Anti-Virus', '  Flash Drive'], support: 0.5
['  Flash Drive', '  Lab Top Case'], support: 0.45
['  Anti-Virus', '  Flash Drive', '  Lab Top Case'], support: 0.45
['  Anti-Virus', '  Speakers'], support: 0.45
['  Speakers', '  Flash Drive'], support: 0.3
['  Speakers', '  Lab Top Case'], support: 0.45
['  Anti-Virus', '  Speakers', '  Lab Top Case'], support: 0.4
['  Anti-Virus', '  Speakers', '  Flash Drive'], support: 0.3
['  Speakers', '  Lab Top Case', '  Flash Drive'], support: 0.25
['  Anti-Virus', '  Speakers', '  Lab Top Case', '  

In [15]:
data = {
    "Algorithm": ["BruteApriori", "Apriori", "FPGrowth"],
    "Runtime": [bruteapriori_runtime, apriori_runtime, fpgrowth_runtime]
}

df = pd.DataFrame(data)
df_sorted = df.sort_values(by="Runtime", ascending=True)
print(df_sorted)


      Algorithm   Runtime
2      FPGrowth  0.138815
1       Apriori  0.353907
0  BruteApriori  0.726405
