<a href="https://colab.research.google.com/github/Aleena24/ML_Lab/blob/main/lab4_2348503.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import threading
import matplotlib.pyplot as plt
import time
import logging
import queue


In [None]:
df = pd.read_csv('/content/Groceries_dataset.csv')
df.head()

Data Preparation

In [None]:
#identifying the datatype
print(df.dtypes)

In [None]:
def fp_growth(dataset):

  fp_growth_format = []
  for transaction in dataset:
    new_transaction = []
    for item in transaction:
      try:
        new_transaction.append(int(item))
      except ValueError:
        # The item is not an integer, so we skip it.
        continue
    fp_growth_format.append(new_transaction)

  return fp_growth_format


In [None]:
dataset = [["a", "b", "c"], ["b", "c", "d"], ["a", "b"]]

fp_growth_format = fp_growth(dataset)

print(fp_growth_format)

In [None]:
def apriori_format(dataset):

  apriori_format = []
  for transaction in dataset:
    new_transaction = []
    for item in transaction:
      new_transaction.append(str(item))
    apriori_format.append(new_transaction)

  return apriori_format


Multithreading

In [None]:
def my_function_1():
  print("Thread for FP Growth")

def my_function_2():
  print("Thread for Apriori Algorithm")

thread_1 = threading.Thread(target=my_function_1, daemon = True)
thread_2 = threading.Thread(target=my_function_2, daemon = True)
# Daemon threads are threads that are not required to finish before the main thread exits.

thread_1.start()
thread_2.start()

Algorithm Implementation

In [None]:
def fp_growth(dataset, min_support, confidence):

  fp_tree = FPTree(dataset, min_support)
  frequent_item_sets = fp_tree.mine()

  for item_set in frequent_item_sets:
    for item in item_set:
      if len(item_set) > 1:
        confidence = fp_tree.get_confidence(item_set, item)
        if confidence >= confidence:
          frequent_item_sets.append([item])

  return frequent_item_sets


In [None]:
class Apriori:

  def __init__(self, min_support, confidence):
    self.min_support = min_support
    self.confidence = confidence
    self.frequent_item_sets = []

  def mine(self, dataset):

    item_sets = self._generate_candidate_item_sets(1)

    while len(item_sets) > 0:
      frequent_item_sets = self._mine_item_sets(dataset, item_sets, self.min_support)
      item_sets = self._generate_candidate_item_sets(len(frequent_item_sets[0]))

    return frequent_item_sets

  def _generate_candidate_item_sets(self, k):

    candidate_item_sets = []

    for item_set in self.frequent_item_sets:
      for item in dataset:
        if item not in item_set:
          candidate_item_set = item_set + [item]
          if len(candidate_item_set) == k:
            candidate_item_sets.append(candidate_item_set)

    return candidate_item_sets

  def _mine_item_sets(self, dataset, item_sets, min_support):

    frequent_item_sets = []

    for item_set in item_sets:
      support = self._get_support(dataset, item_set)
      if support >= min_support:
        frequent_item_sets.append(item)


Parallel Execution

In [None]:
def fp_algorithm(df, min_support=0.05, confidence=0.8):
  start_time = time.time()
  frequent_itemsets = fp_growth(df, min_support, confidence)
  end_time = time.time()
  fp_execution_time = end_time - start_time
  return fp_execution_time

def apriori_algorithm(df, min_support=0.05, confidence=0.8):
  start_time = time.time()
  frequent_itemsets = Apriori(df, min_support, confidence)
  end_time = time.time()
  apriori_execution_time = end_time - start_time
  return apriori_execution_time

  # Create a queue to store the execution times of the two algorithms
queue = queue.Queue()

def main():
  # Load the inbuilt dataset
  data = pd.read_csv("Groceries_dataset.csv", header=None)

  # Create threads for the FP and Apriori algorithms
  fp_thread = threading.Thread(target=fp_algorithm, args=(df,))
  apriori_thread = threading.Thread(target=apriori_algorithm, args=(df,))

  # Start the threads
  fp_thread.start()
  apriori_thread.start()

  # Wait for the threads to finish
  fp_thread.join()
  apriori_thread.join()

  # Get the execution times of the two algorithms
  fp_execution_time = queue.get()
  apriori_execution_time = queue.get()

  # Print the execution times
  print("FP-growth algorithm execution time:", fp_execution_time)
  print("Apriori algorithm execution time:", apriori_execution_time)

if __name__ == "__main__":
  main()


Exception in thread Thread-12 (fp_algorithm):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
Exception in thread Thread-13 (apriori_algorithm):
Traceback (most recent call last):
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self._target(*self._args, **self._kwargs)    
  File "<ipython-input-10-8349491317f3>", line 3, in fp_algorithm
self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-10-8349491317f3>", line 10, in apriori_algorithm
TypeError: Apriori.__init__() takes 3 positional arguments but 4 were given
  File "<ipython-input-8-3459a0ffcd2e>", line 3, in fp_growth
NameError: name 'FPTree' is not defined. Did you mean: 'fp_tree'?


Data Visualization

In [None]:
  # Create a bar chart of the execution times
  x = ["FP-growth", "Apriori"]
  y = [fp_execution_time, apriori_execution_time]
  plt.bar(x, y)
  plt.xlabel("Algorithm")
  plt.ylabel("Execution time (seconds)")
  plt.title("Comparison of FP-growth and Apriori execution times")
  plt.show()

if __name__ == "__main__":
  main()

Recommendation

Bonus

In [None]:
def mine_association_rules(frequent_itemsets, min_support, min_confidence):
  association_rules = []
  for itemset in frequent_itemsets:
    for subset in itemset:
      if len(subset) > 1:
        confidence = len(itemset) / len(subset)
        if confidence >= min_confidence:
          association_rules.append((subset, itemset - subset, confidence))
  return association_rules

def main():
  data = read_data()

  # Get the minimum support and minimum confidence thresholds from the user
  min_support = float(input("Enter the minimum support threshold: "))
  min_confidence = float(input("Enter the minimum confidence threshold: "))

  # Mine association rules from the frequent itemsets generated by both algorithms
  fp_frequent_itemsets = fp_algorithm(data)
  apriori_frequent_itemsets = apriori(data)

  fp_association_rules = mine_association_rules(fp_frequent_itemsets, min_support, min_confidence)
  apriori_association_rules = mine_association_rules(apriori_frequent_itemsets, min_support, min_confidence)

  # Print the association rules generated by both algorithms
  print("FP-growth algorithm")
  for rule in fp_association_rules:
    print(rule)

  print("Apriori algorithm")
  for rule in apriori_association_rules:
    print(rule)

if __name__ == "__main__":
  main()
