In [2]:
%pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: mlxtend
Successfully installed mlxtend-0.23.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Load Amazon transactions
amazon_df = pd.read_csv("Amazon transactions.csv")

# Convert all non-numeric values to strings
amazon_df = amazon_df.applymap(str)

# Perform Apriori algorithm for Amazon
amazon_te = TransactionEncoder()
amazon_te_ary = amazon_te.fit(amazon_df.values).transform(amazon_df.values)
amazon_df_encoded = pd.DataFrame(amazon_te_ary, columns=amazon_te.columns_)

# Set minimum support and confidence
amazon_min_support = 0.2
amazon_min_confidence = 0.7

# Apriori algorithm
amazon_frequent_itemsets = apriori(amazon_df_encoded, min_support=amazon_min_support, use_colnames=True)

# Association rules
amazon_rules = association_rules(amazon_frequent_itemsets, metric="confidence", min_threshold=amazon_min_confidence)

# Display results for Amazon
print("Amazon Frequent Itemsets:")
print(amazon_frequent_itemsets)

print("\nAmazon Association Rules:")
print(amazon_rules)


best_buy_df = pd.read_csv('Best Buy transactions.csv')

# Convert all non-numeric values to strings
best_buy_df = best_buy_df.applymap(str)

# Perform Apriori algorithm for Best Buy
best_buy_te = TransactionEncoder()
best_buy_te_ary = best_buy_te.fit(best_buy_df.values).transform(best_buy_df.values)
best_buy_df_encoded = pd.DataFrame(best_buy_te_ary, columns=best_buy_te.columns_)

# Set minimum support and confidence for Best Buy
best_buy_min_support = 0.2
best_buy_min_confidence = 0.7

# Apriori algorithm for Best Buy
best_buy_frequent_itemsets = apriori(best_buy_df_encoded, min_support=best_buy_min_support, use_colnames=True)

# Association rules for Best Buy
best_buy_rules = association_rules(best_buy_frequent_itemsets, metric="confidence", min_threshold=best_buy_min_confidence)


print("\nBest Buy Frequent Itemsets:")
print(best_buy_frequent_itemsets)

print("\nBest Buy Association Rules:")
print(best_buy_rules)

Amazon Frequent Itemsets:
    support                                           itemsets
0      0.55                               (A Beginner’s Guide)
1      0.65          (Android Programming: The Big Nerd Ranch)
2      0.30                  (Beginning Programming with Java)
3      0.40                      (Head First Java 2nd Edition)
4      0.20                              (Java 8 Pocket Guide)
5      0.65                                 (Java For Dummies)
6      0.50                     (Java: The Complete Reference)
7      0.90                                              (nan)
8      0.30  (Android Programming: The Big Nerd Ranch, A Be...
9      0.45             (A Beginner’s Guide, Java For Dummies)
10     0.45  (A Beginner’s Guide, Java: The Complete Refere...
11     0.45                          (A Beginner’s Guide, nan)
12     0.30  (Android Programming: The Big Nerd Ranch, Head...
13     0.45  (Android Programming: The Big Nerd Ranch, Java...
14     0.30  (Android Program

  amazon_df = amazon_df.applymap(str)
  best_buy_df = best_buy_df.applymap(str)


In [6]:
import pandas as pd

# Load Amazon data from CSV file
amazon_df = pd.read_csv('Amazon transactions.csv')
amazon_transactions = [list(amazon_df.iloc[i].dropna()) for i in range(len(amazon_df))]

# Load Best Buy data from CSV file
bestbuy_df = pd.read_csv('Best Buy transactions.csv')
bestbuy_transactions = [list(bestbuy_df.iloc[i].dropna()) for i in range(len(bestbuy_df))]

def generate_candidates(prev_candidates, k):
    candidates = set()

    for i in prev_candidates:
        for j in prev_candidates:
            # Ensure that we only consider merging sets and not frozensets
            if isinstance(i, set) and isinstance(j, set):
                union_set = i.union(j)
                if len(union_set) == k:
                    candidates.add(union_set)

    return candidates

def get_frequent_itemsets(transactions, min_support):
    itemsets = []
    unique_items = set(item for transaction in transactions for item in transaction)

    # Initialize frequent 1-itemsets
    frequent_itemsets = [frozenset({item}) for item in unique_items if is_frequent(frozenset({item}), transactions, min_support)]

    k = 2
    while frequent_itemsets:
        candidates = generate_candidates(frequent_itemsets[-1], k)
        frequent_itemsets_k = [c for c in candidates if is_frequent(c, transactions, min_support)]
        if not frequent_itemsets_k:
            break  # Break if no more frequent itemsets of size k can be found
        frequent_itemsets.extend(frequent_itemsets_k)
        k += 1

    return frequent_itemsets

def is_frequent(itemset, transactions, min_support):
    support_count = sum(1 for transaction in transactions if itemset.issubset(set(transaction)))
    support = support_count / len(transactions)
    return support >= min_support

# Use Amazon data
min_support_amazon = 0.2
frequent_itemsets_amazon = get_frequent_itemsets(amazon_transactions, min_support_amazon)
print("Frequent Itemsets for Amazon:")
for itemset in frequent_itemsets_amazon:
    print(itemset)

# Use Best Buy data
min_support_bestbuy = 0.2
frequent_itemsets_bestbuy = get_frequent_itemsets(bestbuy_transactions, min_support_bestbuy)
print("\nFrequent Itemsets for Best Buy:")
for itemset in frequent_itemsets_bestbuy:
    print(itemset)


Frequent Itemsets for Amazon:
frozenset({'Java For Dummies'})
frozenset({'Beginning Programming with Java'})
frozenset({'Java: The Complete Reference'})
frozenset({'Android Programming: The Big Nerd Ranch'})
frozenset({'Java 8 Pocket Guide'})
frozenset({'A Beginner’s Guide'})
frozenset({'Head First Java 2nd Edition'})

Frequent Itemsets for Best Buy:
frozenset({'External Hard-Drive'})
frozenset({'Lab Top'})
frozenset({'Digital Camera'})
frozenset({'Speakers'})
frozenset({'Microsoft Office'})
frozenset({'Flash Drive'})
frozenset({'Anti-Virus'})
frozenset({'Desk Top'})
frozenset({'Printer'})
frozenset({'Lab Top Case'})


In [8]:
import itertools
import time
import pandas as pd

# Function for the brute-force method
def brute_force_frequent_itemsets(transactions, min_support):
    itemsets = set()
    frequent_itemsets = []

    unique_items = set(item for sublist in transactions for item in sublist)

    for k in range(1, len(unique_items) + 1):
        # Generate all possible k-itemsets
        k_itemsets = list(itertools.combinations(unique_items, k))

        # Check support for each k-itemset
        frequent_k_itemsets = [itemset for itemset in k_itemsets if is_frequent(itemset, transactions, min_support)]

        if not frequent_k_itemsets:
            break

        frequent_itemsets.extend(frequent_k_itemsets)
        itemsets.update(frequent_k_itemsets)

    return itemsets

# Function to check support for an itemset
def is_frequent(itemset, transactions, min_support):
    support_count = sum(1 for transaction in transactions if set(itemset).issubset(transaction))
    support = support_count / len(transactions)
    return support >= min_support

# Function to calculate brute force execution time
def brute_force_execution_time(transactions, min_support):
    start_time = time.time()
    brute_force_frequent_itemsets(transactions, min_support)
    return time.time() - start_time

# Load Amazon data from CSV file
amazon_df = pd.read_csv('Amazon transactions.csv')
amazon_transactions = [list(amazon_df.iloc[i].dropna()) for i in range(len(amazon_df))]

# Load Best Buy data from CSV file
bestbuy_df = pd.read_csv('Best Buy transactions.csv')
bestbuy_transactions = [list(bestbuy_df.iloc[i].dropna()) for i in range(len(bestbuy_df))]

# Use Amazon data
min_support_amazon = 0.2
brute_force_time_amazon = brute_force_execution_time(amazon_transactions, min_support_amazon)
print(f"Brute Force Execution Time for Amazon: {brute_force_time_amazon} seconds")

# Use Best Buy data
min_support_bestbuy = 0.2
brute_force_time_bestbuy = brute_force_execution_time(bestbuy_transactions, min_support_bestbuy)
print(f"Brute Force Execution Time for Best Buy: {brute_force_time_bestbuy} seconds")


# Calculate and compare execution times
apriori_time = apriori_execution_time(transactions, min_support)
print(f"Apriori Execution Time: {apriori_time} seconds")



Brute Force Execution Time for Amazon: 0.0032672882080078125 seconds
Brute Force Execution Time for Best Buy: 0.011471271514892578 seconds
Apriori Execution Time: 0.001970052719116211 seconds
