### Packages

In [None]:
#standard Python packages
import pandas as pd
import numpy as np
import random

#import package to perform combination calculations
from itertools import combinations
from itertools import permutations

#Apriori packages
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

#package to silence warning messages
import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")

#rime package to measure model computation time
import time

#package to analyze the file location and check if the trabsaction datasets already exist in the local drive
import os

#package to save and read transaction datasets on local drive
import pickle

### Item lists:
list of top selling items by store

In [None]:
#Item lists
shoprite_top_selling = ["Milk","Eggs","Bread","Rice","Pasta","Chicken","Beef","Oranges","Spinach","Cereal","Yogurt","Cheese","Butter","Cooking oil","Beans","Canned tomatoes",
    "Peanuts","Jam","Frozen vegetables","Pizza","Ice cream","Chips","Coffee","Tea","Sugar","Salt","Pepper","Dish soap","Toilet paper","Shampoo"]

amazon_top_selling = ["Smartphone","Laptop","Headphones","Books","Video games","Fitness tracker","Coffee maker","Bluetooth speaker","Portable charger","Smartwatch",
    "Kitchen knife set","Electric toothbrush","Yoga mat","Air fryer","Wireless earbuds","Instant pot","External hard drive","Resistance bands","Digital camera",
    "Tablet","LED TV","Home security camera","Water bottle","Electric kettle","Printer","Air purifier","Smart thermostat","Dumbbells","Robot vacuum","Car phone mount"]

costco_top_selling_items = ["Toilet paper","Rotisserie chicken","Kirkland Signature batteries","Laundry detergent","Paper towels","Water bottles","Fresh produce",
    "Snack foods","Kitchen appliances","Wine","Frozen foods","Office supplies","Clothing","Electronics","Outdoor furniture","Mattresses","Jewelry","Home appliances",
    "Tools","Books","Pet supplies","Tires","Baby products","Furniture","Health and beauty products","Cookware","Cleaning supplies","Gardening supplies","School supplies",
    "Home decor"]

dicks_top_selling_items = ["Athletic shoes","Sports apparel","Fitness equipment","Outdoor gear","Hiking boots","Running shoes","Exercise clothing","Basketball",
    "Football","Baseball equipment","Golf clubs","Tennis racquets","Camping gear","Fishing equipment","Bicycles","Swimming gear","Yoga equipment","Soccer equipment",
    "Hunting gear","Skiing equipment","Snowboarding gear","Gym bags","Water bottles","Fitness trackers","Gym accessories","Gymnastics equipment","Skateboarding gear",
    "Inline skates","Scooters","Surfing gear"]

target_top_selling_items = ["Household essentials","Groceries","Electronics","Clothing","Home decor","Furniture","Toys","Baby products","Beauty products",
    "Healthcare products","Pet supplies","Outdoor furniture","Kitchen appliances","Bedding","Bath towels","School supplies","Office supplies","Craft supplies",
    "Books","Games","Cookware","Cleaning supplies","Storage solutions","Party supplies","Gardening supplies","Sports equipment","Fitness equipment","Bikes",
    "Skateboards","Scooters"]

### Model Functions & Initialization

In [None]:
"""---------------------------------------------------DATA GENERATION FUNCTIONS------------------------------------------------------------"""
#function to generate randon datasets with 20 transactions each
def create_db(max_items, n_transactions):
  #Check if the datasets have already been created, if not move on otherwise stop the function
  #I do not want to keep creating datasets every time that I relaunch the model
  if os.path.exists('amazon.pkl') and os.path.exists('costco.pkl') and os.path.exists('dicks.pkl') \
        and os.path.exists('target.pkl') and os.path.exists('shoprite.pkl'):
    print("Pickle files already exist. Skipping new transactions dataset creation.")
    return

  #Lists of transactions
  amazon = []
  target = []
  dicks = []
  costco = []
  shoprite = []

  #Store and item lists
  store_list = [["Amazon", amazon_top_selling], ["Costco",costco_top_selling_items ], ["Dicks", dicks_top_selling_items], ["Target", target_top_selling_items],
                ["ShopRite", shoprite_top_selling]]

  #Generate transactions for each store
  for store, items in store_list:
      for _ in range(n_transactions):
          transaction_items = random.sample(items, min(np.random.randint(1, max_items), len(items)))
          if store == "Amazon":
              amazon.append(transaction_items)
          elif store == "Costco":
              costco.append(transaction_items)
          elif store == "Dicks":
              dicks.append(transaction_items)
          elif store == "Target":
              target.append(transaction_items)
          elif store == "ShopRite":
              shoprite.append(transaction_items)

# Save datasets to pickle files on local drive
  with open('amazon.pkl', 'wb') as f:
      pickle.dump(amazon, f)
  with open('costco.pkl', 'wb') as f:
      pickle.dump(costco, f)
  with open('dicks.pkl', 'wb') as f:
      pickle.dump(dicks, f)
  with open('target.pkl', 'wb') as f:
      pickle.dump(target, f)
  with open('shoprite.pkl', 'wb') as f:
      pickle.dump(shoprite, f)


#function to read datasets from pickle files on local drive
def get_pickle():
    with open('amazon.pkl', 'rb') as f:
        amazon = pickle.load(f)

    with open('costco.pkl', 'rb') as f:
        costco = pickle.load(f)

    with open('dicks.pkl', 'rb') as f:
        dicks = pickle.load(f)

    with open('target.pkl', 'rb') as f:
        target = pickle.load(f)

    with open('shoprite.pkl', 'rb') as f:
        shoprite = pickle.load(f)

    return amazon, costco, dicks, target, shoprite

"""----------------------------------------------------APRIORI ALGO-----------------------------------------------------------"""
#Function for Apriori algorithm
def apriori_function(dataset, min_support, min_confidence):

    # Transform the dataset into a binary format suitable for Apriori
    te = TransactionEncoder()
    te_ary = te.fit(dataset).transform(dataset)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    # Apply Apriori algorithm to find frequent itemsets
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    if len(frequent_itemsets) == 0:
        print("No frequent itemsets found")
        return
    else:
        # Generate association rules
        rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
        frequent_itemsets_count = len(frequent_itemsets)
        rules_count = len(rules)
        # Display results
        print("Frequent Itemsets:")
        display(frequent_itemsets)

        print("\nAssociation Rules:")
        display(rules)
        return rules_count, frequent_itemsets_count

"""-----------------------------------------------------BRUTE FORCE FUNCTIONS----------------------------------------------------------"""

# Define a function to take in the store input from the user
def get_store():
    while True:
        store_name = input("Enter a store number: 1. Amazon, 2. Costco, 3. Target, 4. Dick's, 5. ShopRite, 6. All: ")
        try:
            store_num = int(store_name)
            if 1 <= store_num <= 6:
                if store_num == 1:
                    return "amazon"
                elif store_num == 2:
                    return "costco"
                elif store_num == 3:
                    return "target"
                elif store_num == 4:
                    return "dicks"
                elif store_num == 5:
                    return "shoprite"
                elif store_num == 6:
                    return "all"
            else:
                print("Invalid store number. Please enter a number between 1 and 6.")
        except ValueError:
            print("Invalid input. Please enter a valid integer number.")


#function to get the minimum support and minimum confidence values from the suer
def get_min_support_confidence():
    while True:
        min_support = input("Enter a decimal minimum support value greater than 0 and less or equal than 1: ")
        min_confidence = input("Enter a decimal confidence support value greater than 0 and less or equal than 1: ")
        try:
            min_support = float(min_support)
            min_confidence = float(min_confidence)
            if 0 < min_support <= 1 and 0 < min_confidence <= 1:
                return min_support, min_confidence
            else:
                print("Invalid values. Please enter decimal numbers between 0 and 1.")
        except ValueError:
            print("Invalid input. Please enter a valid decimal number.")


#generate itemset of size n for selected store
def itemset_gen(n, store_top_selling):
  itemset = list(combinations(store_top_selling, n))
  return itemset

#Support calculation for an itemset of any size
def support_calc(itemset, store_transactions, min_support):
  support_val_dict = {}
  new_frequent_itemset = False

  #Calculate the number of times the itemset appears in the transactions dataset
  for item in itemset:
    final_support = 0
    support_val = 0
    for transaction in store_transactions:
      if all(word in transaction for word in item):
        support_val += 1 #increment itemset count by one if the itemset is in the transaction
    #calculate the support value for the itemset
    final_support = support_val/len(store_transactions)
    #if the support value is greater or equal than the minimum support, join the items and add the new
    #frequent itemset to the support val dictionary
    if final_support >= min_support:
        key_ = ",".join(item)
        support_val_dict[key_] = final_support
        #if a new frequent itemset was added, change new_frequent_itemset to True.
        new_frequent_itemset = True

  return support_val_dict, new_frequent_itemset


#function to get the support for the frequent item permutations
#for every frequent itemset, find all possible permutations
def permutation_support(itemset, frequent_itemset):
  permutation_support_dict = {}
  itemset_split = itemset.split(",")
  itemset_len = len(itemset_split)
  itemset_permutations = list(permutations(itemset_split, itemset_len))
  support_val = frequent_itemset[itemset]

  for item in itemset_permutations:
    item = ",".join(item)
    permutation_support_dict[item] = support_val

  return permutation_support_dict

#frequent_itemset_w_permutations: get the support value for all frequent itemset permutations
def perm_support_to_freq_itemset_dict(frequent_itemset):
  temp_dict = {}
  for item in frequent_itemset:
    item_split = item.split(",")
    if len(item_split)>=2:
      temp_dict.update(permutation_support(item, frequent_itemset))
  return temp_dict


# Define a function to sort the values in a row
def sort_row(row):
    return sorted(row)


#function to extract the association rules
def association_rules_funct(frequent_itemset_w_permut_dict, min_confidence):
  #initialize dictionary that will store the association rules
  association_rules_dict = {}


  for item in frequent_itemset_w_permut_dict.keys():
    #split the itemset into individual words
    item_split = item.split(",")
    #get the length and the individual items in each itemset
    item_len = len(item_split)

    #for each itemset calculate the association rules
    if item_len >=2:
      for i in range(1, item_len):
        left_side = ",".join(item_split[0:i])
        #this wil format the association rule so that it will look like A-> B
        key_ = left_side+" -> "+ ",".join(item_split[i:item_len])

        left_side_support = frequent_itemset_w_permut_dict[left_side]

        full_support = frequent_itemset_w_permut_dict[item]

        confidence = full_support/left_side_support

        #if the itemset confidence is greater than the minimum confidence, then add itemset to association rule dictionary
        if confidence >= min_confidence:
            association_rules_dict[key_] = [round(full_support, 3), round(confidence,3)]

  #Convert the association rule dictionary to a dataframe so that results can be displayed in a clean format
  results_df = pd.DataFrame(association_rules_dict).T
  results_df.reset_index(inplace=True)
  results_df = results_df.rename(columns={0: "Support", 1: "Confidence", "index": "Association rule"})

  left_list = []
  right_list = []

  #Add the left and right sides of the association rules to the left and right list
  for i in results_df["Association rule"].to_list():
    left_, right_ = i.split("->")
    left_list.append(left_)
    right_list.append(right_)

  #add the left and right lists to the results dataframe
  results_df["left"] = left_list
  results_df["right"] = right_list

  # Apply the sorting function to each element of the DataFrame's 'left' and 'right' columns
  results_df[['left', 'right']] = results_df[['left', 'right']].applymap(sort_row)

  # Convert the sorted lists to tuples to make them hashable
  results_df['left'] = results_df['left'].apply(tuple)
  results_df['right'] = results_df['right'].apply(tuple)

  # Drop duplicate association rules
  results_df.drop_duplicates(subset=['left', 'right'], inplace=True)

  #Reset the dataframe index
  results_df.reset_index(drop=True, inplace=True)
  results_df.drop(columns=["left", "right"], inplace=True)

  return results_df


### Master function
functions for the execution of Apriori and Brute force algorithms

In [None]:
#Master function that executes the Apriori algorithm and the brute force algorithm sequentially


def model_run():
  #initialize lists where result counts and computation times will be stored
  apriori_counts = []
  brute_counts = []
  computation_time = []

  """============================DATA CREATION"""
  #create 5 databases with 20 transactions each; new data won't be generated if there exist pickle files in the local drive
  create_db(max_items = 16, n_transactions=20)

  #retrieve transaction data from pickle files
  amazon, costco, dicks, target, shoprite = get_pickle()

  #Store and item dictionary of lists
  store_dictionary = {"amazon": [amazon_top_selling, amazon], "costco": [costco_top_selling_items, costco], "dicks": [dicks_top_selling_items, dicks],
                      "target": [target_top_selling_items, target],"shoprite": [shoprite_top_selling, shoprite]}

  """============================INPUT RETRIEVAL FROM USER"""
  #get the store name from the user
  store_input = get_store()
  print(f"Your selected store is {store_input}")

  #get the minimum support and confidence value from the user
  min_support, min_confidence = get_min_support_confidence()
  print(f"Your selected minimum support is {min_support}")
  print(f"Your selected minimum confidence is {min_confidence}")

  store_selection = []

  if store_input=="all":
    store_selection = ['amazon', 'target', 'costco', 'dicks', 'shoprite']
  else:
     store_selection = [store_input]

  for store_name in store_selection:
    #Get the top selling list for the selected store
    store_top_selling = store_dictionary[store_name][0]

    #Get the transaction list for the selected store
    store_transactions = store_dictionary[store_name][1]

    print(f"STORE = {store_name}=============================APRIORI ALGORITHM EXECUTION============================")
    # Start measuring time
    start_time = time.time()

    apriori_rules_count, apriori_frequent_itemsets_count = apriori_function(store_transactions, min_support, min_confidence)

    #Save Apriori counts for frequent itemsets and association rules
    apriori_counts.append([apriori_rules_count, apriori_frequent_itemsets_count])

    # Stop measuring time
    end_time = time.time()

    # Calculate and display the elapsed time
    elapsed_time_apriori = round(end_time - start_time,3)
    print(f"Code computation time of Apriori algorithm: {elapsed_time_apriori} seconds")

    print(f"\n\n\nSTORE = {store_name}=============================BRUTE FORCE ALGORITHM EXECUTION=========================")
    # Start measuring time
    start_time = time.time()

    # frequent_itemset = {}
    itemset = []
    k = 1 #itemset size
    final_frequent_set = {}
    #This while loops finds the new itemsets of size k and calculates if there are new frequent itemsets
    while True:
        print(f"Computing {k} frequent itemsets")
        #generate all itemsets of size k
        itemset = itemset_gen(k, store_top_selling)
        # itemset.extend(itemset_list)

        #calculate the support for all n-item-sets
        #dictionary that will contain the support values for the itemsets
        frequent_set, new_frequent_event = support_calc(itemset, store_transactions, min_support)

        #get frequent itemsets
        # new_freq_itemset = freq_item(support_val_dict, min_support) #dictionary that contains the frequent itemsets
        #If there are new itemsets, add new frequent itemsets to dictionary and increase the itemset size k
        #othwesise stop frequentset search
        if new_frequent_event == True:
            final_frequent_set.update(frequent_set)
            k +=1
        else:
            break

    frequent_itemset_df = pd.DataFrame.from_dict(final_frequent_set, orient='index', columns=['Support'])
    frequent_itemset_df.reset_index(inplace=True)
    frequent_itemset_df.rename(columns={"index": "Itemset"}, inplace=True)
    display(frequent_itemset_df)


    #initialize a new dictionary that will contain the frequent itemsets and their support values,
    #as well as the itemset permutations and their support values
    frequent_itemset_w_permut_dict = {}
    frequent_itemset_w_permut_dict.update(final_frequent_set)
    frequent_itemset_w_permut_dict.update(perm_support_to_freq_itemset_dict(final_frequent_set))

    print("calculating associations rules")
    #get the association rules
    results_df = association_rules_funct(frequent_itemset_w_permut_dict, min_confidence)

    if results_df.empty:
      print(f"the {store_name} dataset contains no association rules")
    else:
      print(f"the {store_name} dataset contains {len(results_df)} association rules")
      display(results_df)

    # Stop measuring time
    end_time = time.time()

    #Save Brute force counts for frequent itemsets and association rules
    brute_rules_count = results_df.shape[0]
    brute_frequent_itemsets_count = frequent_itemset_df.shape[0]
    brute_counts.append([brute_rules_count, brute_frequent_itemsets_count])

    # Calculate and display the elapsed time
    elapsed_time_brute = round(end_time - start_time,3)

    #save computation times to list
    computation_time.append([elapsed_time_apriori, elapsed_time_brute])
    print(f"Code computation time of Brute Force algorithm: {elapsed_time_brute} seconds")

  return brute_counts, apriori_counts, store_selection, computation_time


### <font color="orange">Model execution</font>:
Run the <font color="orange">model_run()</font> function to execute the Apriori and Brute force algorithm for the selected store.
When executed, the model will prompt the user to provide the following inputs:
1. <font color="orange">Store number:</font> 1. Amazon, 2. Costco, 3. Target, 4. Dick's, 5. ShopRite, 6. All. Any decimal number of number greater than 6 or smaller than 1 won't be accepted.
2. <font color="orange">Minimum support:</font> any decimal number grater than zero or less than or equal than 1, numbers outside the accepable range or letters won't be accepted
3. <font color="orange">Minimum confidence:</font> any decimal number grater than zero or less than or equal than 1, numbers outside the acceptable range or letters won't be accepted

When all inputs have been successfully entered, if "All" wass selected, then the model will output the Apriori and Brute force results for each store at a time, as well as the computation time. If instead a specific store was selected, then only that store output will be provided

In [None]:
#Function that runs the entire model
brute_counts, apriori_counts, store_selection, computation_time  = model_run()

#Return the side by side comparison of Apriori and Brute force algorithms
print("\n\n=============================RESULTS COMPARISON=============================")
results_summary_df = pd.DataFrame()
results_summary_df['Store name'] = store_selection
results_summary_df['Apriori time (sec)'] = [x[0] for x in computation_time]
results_summary_df['Brute force time (sec)'] = [x[1] for x in computation_time]
results_summary_df['Speed comparison'] = (results_summary_df['Brute force time (sec)']/results_summary_df['Apriori time (sec)'])
results_summary_df['Apriori association rules'] = [x[0] for x in apriori_counts]
results_summary_df['Apriori freq itemsets'] = [x[1] for x in apriori_counts]
results_summary_df['Brute force association rules'] = [x[0] for x in brute_counts]
results_summary_df['Brute force freq itemsets'] = [x[1] for x in brute_counts]
display(results_summary_df)

print(f"Apriori mean time = {round(results_summary_df['Apriori time (sec)'].mean(),3)} | Brute force mean time = {round(results_summary_df['Brute force time (sec)'].mean(),3)} | Speed comparison mean = {round(results_summary_df['Speed comparison'].mean(),3)}")


Pickle files already exist. Skipping new transactions dataset creation.
Your selected store is all
Your selected minimum support is 0.25
Your selected minimum confidence is 0.25
Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.25,(Air fryer)
1,0.35,(Air purifier)
2,0.25,(Car phone mount)
3,0.3,(Dumbbells)
4,0.3,(Electric kettle)
5,0.25,(Electric toothbrush)
6,0.35,(Headphones)
7,0.25,(Instant pot)
8,0.3,(Kitchen knife set)
9,0.4,(LED TV)



Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Electric kettle),(Resistance bands),0.3,0.35,0.25,0.833333,2.380952,0.145,3.9,0.828571
1,(Resistance bands),(Electric kettle),0.35,0.3,0.25,0.714286,2.380952,0.145,2.45,0.892308


Code computation time of Apriori algorithm: 0.018 seconds



Computing 1 frequent itemsets
Computing 2 frequent itemsets
Computing 3 frequent itemsets


Unnamed: 0,Itemset,Support
0,Headphones,0.35
1,Video games,0.3
2,Portable charger,0.25
3,Smartwatch,0.25
4,Kitchen knife set,0.3
5,Electric toothbrush,0.25
6,Air fryer,0.25
7,Instant pot,0.25
8,Resistance bands,0.35
9,Tablet,0.3


calculating associations rules
the amazon dataset contains 2 association rules


Unnamed: 0,Association rule,Support,Confidence
0,Resistance bands -> Electric kettle,0.25,0.714
1,Electric kettle -> Resistance bands,0.25,0.833


Code computation time of Brute Force algorithm: 0.042 seconds
Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.3,(Baby products)
1,0.25,(Bedding)
2,0.35,(Bikes)
3,0.3,(Books)
4,0.35,(Craft supplies)
5,0.25,(Electronics)
6,0.25,(Gardening supplies)
7,0.35,(Groceries)
8,0.35,(Healthcare products)
9,0.35,(Kitchen appliances)



Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Party supplies),(Baby products),0.4,0.3,0.25,0.625,2.083333,0.13,1.866667,0.866667
1,(Baby products),(Party supplies),0.3,0.4,0.25,0.833333,2.083333,0.13,3.6,0.742857
2,(Scooters),(Baby products),0.3,0.3,0.25,0.833333,2.777778,0.16,4.2,0.914286
3,(Baby products),(Scooters),0.3,0.3,0.25,0.833333,2.777778,0.16,4.2,0.914286
4,(Books),(Bikes),0.3,0.35,0.25,0.833333,2.380952,0.145,3.9,0.828571
5,(Bikes),(Books),0.35,0.3,0.25,0.714286,2.380952,0.145,2.45,0.892308
6,(Scooters),(Party supplies),0.3,0.4,0.25,0.833333,2.083333,0.13,3.6,0.742857
7,(Party supplies),(Scooters),0.4,0.3,0.25,0.625,2.083333,0.13,1.866667,0.866667


Code computation time of Apriori algorithm: 0.012 seconds



Computing 1 frequent itemsets
Computing 2 frequent itemsets
Computing 3 frequent itemsets


Unnamed: 0,Itemset,Support
0,Groceries,0.35
1,Electronics,0.25
2,Baby products,0.3
3,Healthcare products,0.35
4,Pet supplies,0.3
5,Outdoor furniture,0.35
6,Kitchen appliances,0.35
7,Bedding,0.25
8,School supplies,0.3
9,Office supplies,0.25


calculating associations rules
the target dataset contains 8 association rules


Unnamed: 0,Association rule,Support,Confidence
0,Baby products -> Party supplies,0.25,0.833
1,Baby products -> Scooters,0.25,0.833
2,Books -> Bikes,0.25,0.833
3,Party supplies -> Scooters,0.25,0.625
4,Party supplies -> Baby products,0.25,0.625
5,Scooters -> Baby products,0.25,0.833
6,Bikes -> Books,0.25,0.714
7,Scooters -> Party supplies,0.25,0.833


Code computation time of Brute Force algorithm: 0.033 seconds
Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.3,(Clothing)
1,0.25,(Cookware)
2,0.4,(Electronics)
3,0.35,(Fresh produce)
4,0.3,(Frozen foods)
5,0.25,(Furniture)
6,0.4,(Gardening supplies)
7,0.3,(Health and beauty products)
8,0.45,(Home appliances)
9,0.35,(Home decor)



Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Kirkland Signature batteries),(Electronics),0.4,0.4,0.3,0.75,1.875,0.14,2.4,0.777778
1,(Electronics),(Kirkland Signature batteries),0.4,0.4,0.3,0.75,1.875,0.14,2.4,0.777778
2,(Toilet paper),(Electronics),0.4,0.4,0.25,0.625,1.5625,0.09,1.6,0.6
3,(Electronics),(Toilet paper),0.4,0.4,0.25,0.625,1.5625,0.09,1.6,0.6
4,(Gardening supplies),(Fresh produce),0.4,0.35,0.25,0.625,1.785714,0.11,1.733333,0.733333
5,(Fresh produce),(Gardening supplies),0.35,0.4,0.25,0.714286,1.785714,0.11,2.1,0.676923
6,(School supplies),(Fresh produce),0.5,0.35,0.25,0.5,1.428571,0.075,1.3,0.6
7,(Fresh produce),(School supplies),0.35,0.5,0.25,0.714286,1.428571,0.075,1.75,0.461538
8,(School supplies),(Gardening supplies),0.5,0.4,0.3,0.6,1.5,0.1,1.5,0.666667
9,(Gardening supplies),(School supplies),0.4,0.5,0.3,0.75,1.5,0.1,2.0,0.555556


Code computation time of Apriori algorithm: 0.019 seconds



Computing 1 frequent itemsets
Computing 2 frequent itemsets
Computing 3 frequent itemsets
Computing 4 frequent itemsets


Unnamed: 0,Itemset,Support
0,Toilet paper,0.4
1,Rotisserie chicken,0.3
2,Kirkland Signature batteries,0.4
3,Paper towels,0.35
4,Water bottles,0.25
5,Fresh produce,0.35
6,Snack foods,0.25
7,Kitchen appliances,0.3
8,Wine,0.25
9,Frozen foods,0.3


calculating associations rules
the costco dataset contains 24 association rules


Unnamed: 0,Association rule,Support,Confidence
0,Toilet paper -> Electronics,0.25,0.625
1,Kirkland Signature batteries -> Electronics,0.3,0.75
2,Fresh produce -> Gardening supplies,0.25,0.714
3,Fresh produce -> School supplies,0.25,0.714
4,Wine -> Gardening supplies,0.25,1.0
5,Home appliances -> School supplies,0.25,0.556
6,Tools -> Home decor,0.25,0.833
7,Gardening supplies -> School supplies,0.3,0.75
8,School supplies -> Home decor,0.25,0.5
9,"Fresh produce -> Gardening supplies,School sup...",0.25,0.714


Code computation time of Brute Force algorithm: 0.208 seconds
Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.5,(Baseball equipment)
1,0.35,(Basketball)
2,0.3,(Camping gear)
3,0.4,(Exercise clothing)
4,0.4,(Fishing equipment)
5,0.35,(Fitness equipment)
6,0.25,(Fitness trackers)
7,0.45,(Golf clubs)
8,0.4,(Gym accessories)
9,0.35,(Gym bags)



Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Baseball equipment),(Exercise clothing),0.50,0.40,0.25,0.500000,1.250000,0.0500,1.200000,0.400000
1,(Exercise clothing),(Baseball equipment),0.40,0.50,0.25,0.625000,1.250000,0.0500,1.333333,0.333333
2,(Fishing equipment),(Baseball equipment),0.40,0.50,0.30,0.750000,1.500000,0.1000,2.000000,0.555556
3,(Baseball equipment),(Fishing equipment),0.50,0.40,0.30,0.600000,1.500000,0.1000,1.500000,0.666667
4,(Fitness equipment),(Baseball equipment),0.35,0.50,0.25,0.714286,1.428571,0.0750,1.750000,0.461538
...,...,...,...,...,...,...,...,...,...,...
69,"(Gym accessories, Surfing gear)",(Golf clubs),0.25,0.45,0.25,1.000000,2.222222,0.1375,inf,0.733333
70,"(Golf clubs, Surfing gear)",(Gym accessories),0.25,0.40,0.25,1.000000,2.500000,0.1500,inf,0.800000
71,(Gym accessories),"(Golf clubs, Surfing gear)",0.40,0.25,0.25,0.625000,2.500000,0.1500,2.000000,1.000000
72,(Golf clubs),"(Gym accessories, Surfing gear)",0.45,0.25,0.25,0.555556,2.222222,0.1375,1.687500,1.000000


Code computation time of Apriori algorithm: 0.018 seconds



Computing 1 frequent itemsets
Computing 2 frequent itemsets
Computing 3 frequent itemsets
Computing 4 frequent itemsets


Unnamed: 0,Itemset,Support
0,Fitness equipment,0.35
1,Outdoor gear,0.4
2,Hiking boots,0.4
3,Running shoes,0.25
4,Exercise clothing,0.4
5,Basketball,0.35
6,Baseball equipment,0.5
7,Golf clubs,0.45
8,Tennis racquets,0.5
9,Camping gear,0.3


calculating associations rules
the dicks dataset contains 74 association rules


Unnamed: 0,Association rule,Support,Confidence
0,Fitness equipment -> Baseball equipment,0.25,0.714
1,Fitness equipment -> Hunting gear,0.25,0.714
2,Fitness equipment -> Snowboarding gear,0.25,0.714
3,Fitness equipment -> Gym accessories,0.25,0.714
4,Outdoor gear -> Baseball equipment,0.25,0.625
...,...,...,...
69,Surfing gear -> Skateboarding gear,0.25,0.625
70,"Golf clubs,Surfing gear -> Gym accessories",0.25,1.000
71,"Gym accessories -> Golf clubs,Surfing gear",0.25,0.625
72,"Gym accessories,Surfing gear -> Golf clubs",0.25,1.000


Code computation time of Brute Force algorithm: 0.215 seconds
Frequent Itemsets:


Unnamed: 0,support,itemsets
0,0.35,(Bread)
1,0.25,(Butter)
2,0.3,(Canned tomatoes)
3,0.25,(Cereal)
4,0.25,(Cheese)
5,0.4,(Chips)
6,0.3,(Dish soap)
7,0.3,(Eggs)
8,0.3,(Frozen vegetables)
9,0.35,(Ice cream)



Association Rules:


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Bread),(Eggs),0.35,0.3,0.25,0.714286,2.380952,0.145,2.45,0.892308
1,(Eggs),(Bread),0.3,0.35,0.25,0.833333,2.380952,0.145,3.9,0.828571
2,(Bread),(Peanuts),0.35,0.55,0.25,0.714286,1.298701,0.0575,1.575,0.353846
3,(Peanuts),(Bread),0.55,0.35,0.25,0.454545,1.298701,0.0575,1.191667,0.511111
4,(Shampoo),(Bread),0.4,0.35,0.25,0.625,1.785714,0.11,1.733333,0.733333
5,(Bread),(Shampoo),0.35,0.4,0.25,0.714286,1.785714,0.11,2.1,0.676923
6,(Chips),(Peanuts),0.4,0.55,0.3,0.75,1.363636,0.08,1.8,0.444444
7,(Peanuts),(Chips),0.55,0.4,0.3,0.545455,1.363636,0.08,1.32,0.592593
8,(Chips),(Sugar),0.4,0.35,0.25,0.625,1.785714,0.11,1.733333,0.733333
9,(Sugar),(Chips),0.35,0.4,0.25,0.714286,1.785714,0.11,2.1,0.676923


Code computation time of Apriori algorithm: 0.022 seconds



Computing 1 frequent itemsets
Computing 2 frequent itemsets
Computing 3 frequent itemsets
Computing 4 frequent itemsets


Unnamed: 0,Itemset,Support
0,Eggs,0.3
1,Bread,0.35
2,Rice,0.25
3,Pasta,0.5
4,Oranges,0.25
5,Spinach,0.25
6,Cereal,0.25
7,Yogurt,0.3
8,Cheese,0.25
9,Butter,0.25


calculating associations rules
the shoprite dataset contains 34 association rules


Unnamed: 0,Association rule,Support,Confidence
0,Eggs -> Bread,0.25,0.833
1,Eggs -> Peanuts,0.25,0.833
2,Eggs -> Tea,0.25,0.833
3,Bread -> Peanuts,0.25,0.714
4,Bread -> Shampoo,0.25,0.714
5,Pasta -> Peanuts,0.25,0.5
6,Peanuts -> Ice cream,0.25,0.455
7,Peanuts -> Chips,0.3,0.545
8,Peanuts -> Tea,0.25,0.455
9,Peanuts -> Sugar,0.25,0.455


Code computation time of Brute Force algorithm: 0.213 seconds




Unnamed: 0,Store name,Apriori time (sec),Brute force time (sec),Speed comparison,Apriori association rules,Apriori freq itemsets,Brute force association rules,Brute force freq itemsets
0,amazon,0.018,0.042,2.333333,2,17,2,17
1,target,0.012,0.033,2.75,8,22,8,22
2,costco,0.019,0.208,10.947368,24,35,24,35
3,dicks,0.018,0.215,11.944444,74,58,74,58
4,shoprite,0.022,0.213,9.681818,34,38,34,38


Apriori mean time = 0.018 | Brute force mean time = 0.142 | Speed comparison mean = 7.531
