In [3]:
pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   - -------------------------------------- 0.0/1.4 MB 991.0 kB/s eta 0:00:02
   ----- ---------------------------------- 0.2/1.4 MB 2.4 MB/s eta 0:00:01
   ----------- ---------------------------- 0.4/1.4 MB 3.3 MB/s eta 0:00:01
   ------------------ --------------------- 0.7/1.4 MB 4.1 MB/s eta 0:00:01
   ------------------------- -------------- 0.9/1.4 MB 4.5 MB/s eta 0:00:01
   -------------------------------- ------- 1.2/1.4 MB 4.7 MB/s eta 0:00:01
   ------------------------------------- -- 1.4/1.4 MB 4.8 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 4.6 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.1
Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
from itertools import combinations
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import time

In [57]:
def load_dataset(dataset_choice):
    dataset_paths = {
        1: 'amazon_items.csv',
        2: 'best_buy_items.csv',
        3: 'k_mart_items.csv',
        4: 'generic_items.csv',
        5: 'nike_transactions.csv'
    }

    try:
        if dataset_choice in dataset_paths:
            df = pd.read_csv(dataset_paths[dataset_choice])
            return df
        else:
            print("Invalid choice. Please select a number between 1 and 5.")
            return None
    except FileNotFoundError:
        print(f"File not found for choice {dataset_choice}. Please check the file path and try again.")
        return None

# Ask user for dataset choice and load the dataset
try:
    dataset_choice = int(input("Select your Dataset: \n 1. Amazon \n 2. BestBuy \n 3. K-Mart \n 4. Genric \n 5. Nike\n"))
    df = load_dataset(dataset_choice)
    if df is not None:
        print(df)
except ValueError:
    print("Please enter a valid integer.")




Select your Dataset: 
 1. Amazon 
 2. BestBuy 
 3. K-Mart 
 4. Genric 
 5. Nike
 5


   Transaction ID                                        Transaction
0          Trans1     Running Shoe, Socks, Sweatshirts, Modern Pants
1          Trans2                   Running Shoe, Socks, Sweatshirts
2          Trans3     Running Shoe, Socks, Sweatshirts, Modern Pants
3          Trans4            Running Shoe, Sweatshirts, Modern Pants
4          Trans5  Running Shoe, Socks, Sweatshirts, Modern Pants...
5          Trans6                   Running Shoe, Socks, Sweatshirts
6          Trans7  Running Shoe, Socks, Sweatshirts, Modern Pants...
7          Trans8                 Swimming Shirt, Socks, Sweatshirts
8          Trans9  Swimming Shirt, Rash Guard, Dry Fit V-Nick, Ho...
9         Trans10                    Swimming Shirt, Rash Guard, Dry
10        Trans11         Swimming Shirt, Rash Guard, Dry Fit V-Nick
11        Trans12  Running Shoe, Swimming Shirt, Socks, Sweatshir...
12        Trans13  Running Shoe, Swimming Shirt, Socks, Sweatshir...
13        Trans14  Running Shoe, S

In [47]:
min_support = float(input("Input your minimum support value (In decimals 0-1): \n"))
min_confidence = float(input("Input your minimum confidence value (In decimals 0-1): \n"))


Input your minimum support value (In decimals 0-1): 
 0.4
Input your minimum confidence value (In decimals 0-1): 
 0.8


In [49]:
df['Transaction'] = df['Transaction'].apply(lambda x: x.split(','))

# Get unique transactions
transactions = df['Transaction'].tolist()

# Function to filter frequent items
def filter_frequent_items(new_patterns, current_items):
    items_in_new_patterns = set(item for pattern in new_patterns for item in pattern)
    return [item for item in current_items if item in items_in_new_patterns]

# Function to find frequent patterns using brute-force Apriori
def brute_force_apriori(transactions, min_support):
    unique_items = set(item for transaction in transactions for item in transaction)
    pattern_size = 1
    frequent_patterns = []
    pattern_counts = []
    current_frequent_items = list(unique_items)

    while current_frequent_items:
        # Generate combinations of the current frequent items
        candidate_patterns = combinations(current_frequent_items, pattern_size)
        new_frequent_patterns = []

        for pattern in candidate_patterns:
            count = sum(1 for transaction in transactions if set(pattern).issubset(set(transaction)))
            if count >= min_support * len(transactions):
                new_frequent_patterns.append(pattern)
                pattern_counts.append(count)

        frequent_patterns.extend(new_frequent_patterns)
        pattern_size += 1
        current_frequent_items = filter_frequent_items(new_frequent_patterns, current_frequent_items)

    return frequent_patterns, pattern_counts

# Function to generate association rules from frequent patterns
def generate_association_rules(frequent_patterns, pattern_counts, transactions, min_confidence):
    rules = []
    for pattern, count in zip(frequent_patterns, pattern_counts):
        if len(pattern) > 1:
            sub_patterns = [sub_pattern for i in range(1, len(pattern))
                            for sub_pattern in combinations(pattern, i)]
            for sub_pattern in sub_patterns:
                sub_count = sum(1 for transaction in transactions if set(sub_pattern).issubset(set(transaction)))
                if sub_count > 0:
                    confidence = count / sub_count
                    if confidence >= min_confidence:
                        consequence = set(pattern) - set(sub_pattern)
                        rules.append(((tuple(sub_pattern), tuple(consequence)), confidence))
    return rules

# Format association rules for printing
def format_rules(rules):
    formatted_rules = []
    for (antecedent, consequent), confidence in rules:
        formatted_rules.append(f"{antecedent} ---> {consequent} with confidence = {confidence:.2f}")
    return formatted_rules

# Measure the runtime of the brute-force Apriori approach
start_time = time.time()
frequent_patterns, pattern_counts = brute_force_apriori(transactions, min_support)
brute_force_rules = generate_association_rules(frequent_patterns, pattern_counts, transactions, min_confidence)
end_time = time.time()
brute_force_runtime = end_time - start_time

# Format the rules for display
formatted_rules = format_rules(brute_force_rules)

# Print frequent patterns and association rules
def print_frequent_patterns_and_rules(frequent_patterns, pattern_counts, transactions, formatted_rules):
    print("Frequent Patterns:\n")
    for pattern, count in zip(frequent_patterns, pattern_counts):
        print(f"{pattern}, support: {count / len(transactions):.2f}")

    print("\nAssociation Rules:")
    for rule in formatted_rules:
        print(rule)

# Output results
print_frequent_patterns_and_rules(frequent_patterns, pattern_counts, transactions, formatted_rules)
print(f"Brute-force Apriori runtime: {brute_force_runtime} seconds")

Frequent Patterns:

(' Tech Pants',), support: 0.45
(' Dry Fit V-Nick',), support: 0.45
(' Socks',), support: 0.60
(' Sweatshirts',), support: 0.65
(' Modern Pants',), support: 0.50
(' Rash Guard',), support: 0.60
('Running Shoe',), support: 0.70
(' Hoodies',), support: 0.40
(' Tech Pants', ' Dry Fit V-Nick'), support: 0.40
(' Tech Pants', ' Rash Guard'), support: 0.45
(' Tech Pants', ' Hoodies'), support: 0.40
(' Dry Fit V-Nick', ' Rash Guard'), support: 0.45
(' Socks', ' Sweatshirts'), support: 0.55
(' Socks', ' Modern Pants'), support: 0.40
(' Socks', 'Running Shoe'), support: 0.55
(' Sweatshirts', ' Modern Pants'), support: 0.50
(' Sweatshirts', 'Running Shoe'), support: 0.55
(' Modern Pants', 'Running Shoe'), support: 0.45
(' Rash Guard', ' Hoodies'), support: 0.40
(' Tech Pants', ' Dry Fit V-Nick', ' Rash Guard'), support: 0.40
(' Tech Pants', ' Rash Guard', ' Hoodies'), support: 0.40
(' Socks', ' Sweatshirts', ' Modern Pants'), support: 0.40
(' Socks', ' Sweatshirts', 'Running S

In [51]:
# Encode the transactions for the Apriori algorithm from mlxtend
te = TransactionEncoder()
encoded_transactions = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(encoded_transactions, columns=te.columns_)

# Measure runtime for Apriori using mlxtend
start_time = time.time()
frequent_itemsets = apriori(df_encoded, min_support=min_support, use_colnames=True)
apriori_rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
end_time = time.time()
apriori_runtime = end_time - start_time

# Print Apriori results
def display_apriori_results(frequent_itemsets, apriori_rules):
    print("\nFrequent Itemsets:\n")
    for _, row in frequent_itemsets.iterrows():
        print(f"{list(row['itemsets'])}, support: {row['support']}")

    print("\nAssociation Rules:")
    for _, row in apriori_rules.iterrows():
        print(f"{list(row['antecedents'])} ---> {list(row['consequents'])} with confidence = {row['confidence']:.2f}")

# Display Apriori results
display_apriori_results(frequent_itemsets, apriori_rules)
print(f"Apriori runtime: {apriori_runtime} seconds")


Frequent Itemsets:

[' Dry Fit V-Nick'], support: 0.45
[' Hoodies'], support: 0.4
[' Modern Pants'], support: 0.5
[' Rash Guard'], support: 0.6
[' Socks'], support: 0.6
[' Sweatshirts'], support: 0.65
[' Tech Pants'], support: 0.45
['Running Shoe'], support: 0.7
[' Rash Guard', ' Dry Fit V-Nick'], support: 0.45
[' Tech Pants', ' Dry Fit V-Nick'], support: 0.4
[' Rash Guard', ' Hoodies'], support: 0.4
[' Tech Pants', ' Hoodies'], support: 0.4
[' Socks', ' Modern Pants'], support: 0.4
[' Sweatshirts', ' Modern Pants'], support: 0.5
['Running Shoe', ' Modern Pants'], support: 0.45
[' Rash Guard', ' Tech Pants'], support: 0.45
[' Socks', ' Sweatshirts'], support: 0.55
['Running Shoe', ' Socks'], support: 0.55
['Running Shoe', ' Sweatshirts'], support: 0.55
[' Rash Guard', ' Dry Fit V-Nick', ' Tech Pants'], support: 0.4
[' Rash Guard', ' Tech Pants', ' Hoodies'], support: 0.4
[' Socks', ' Sweatshirts', ' Modern Pants'], support: 0.4
['Running Shoe', ' Socks', ' Modern Pants'], support: 0.4

In [53]:
import time
from mlxtend.frequent_patterns import fpgrowth

# Start timing the FP-Growth algorithm
start_time = time.time()

# Find frequent itemsets using the FP-Growth algorithm
frequent_itemsets_fpgrowth = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)

# Generate association rules based on frequent itemsets
rules_fpgrowth = association_rules(frequent_itemsets_fpgrowth, metric="confidence", min_threshold=min_confidence)

# End timing
end_time = time.time()
fpgrowth_runtime = end_time - start_time

# Function to display output in a format similar to the brute-force Apriori method
def display_fpgrowth_output(frequent_itemsets, rules):
    print("Frequent Patterns:\n")
    for _, row in frequent_itemsets.iterrows():
        print(f"{list(row['itemsets'])}, support: {row['support']:.2f}")

    print("\nAssociation Rules:")
    for _, row in rules.iterrows():
        print(f"{list(row['antecedents'])} ---> {list(row['consequents'])} with confidence = {row['confidence']:.2f}")

# Display the formatted output of FP-Growth results
display_fpgrowth_output(frequent_itemsets_fpgrowth, rules_fpgrowth)

# Print the runtime for the FP-Growth algorithm
print(f"FP-Growth runtime: {fpgrowth_runtime} seconds")

Frequent Patterns:

['Running Shoe'], support: 0.70
[' Sweatshirts'], support: 0.65
[' Socks'], support: 0.60
[' Modern Pants'], support: 0.50
[' Rash Guard'], support: 0.60
[' Tech Pants'], support: 0.45
[' Hoodies'], support: 0.40
[' Dry Fit V-Nick'], support: 0.45
['Running Shoe', ' Sweatshirts'], support: 0.55
[' Socks', ' Sweatshirts'], support: 0.55
['Running Shoe', ' Socks'], support: 0.55
['Running Shoe', ' Socks', ' Sweatshirts'], support: 0.50
[' Sweatshirts', ' Modern Pants'], support: 0.50
['Running Shoe', ' Modern Pants'], support: 0.45
[' Socks', ' Modern Pants'], support: 0.40
['Running Shoe', ' Sweatshirts', ' Modern Pants'], support: 0.45
[' Socks', ' Sweatshirts', ' Modern Pants'], support: 0.40
['Running Shoe', ' Socks', ' Modern Pants'], support: 0.40
['Running Shoe', ' Socks', ' Sweatshirts', ' Modern Pants'], support: 0.40
[' Rash Guard', ' Tech Pants'], support: 0.45
[' Tech Pants', ' Hoodies'], support: 0.40
[' Rash Guard', ' Hoodies'], support: 0.40
[' Rash Gua