In [80]:
import pandas as pd
from itertools import combinations

In [81]:
def load_dataset():
  df = pd.read_csv('Groceries_dataset.csv')
  return df.groupby('Member_number')['itemDescription'].apply(list).tolist()

In [82]:
def Support(dataset, itemset):
  return sum(1 for transaction in dataset if set(itemset).issubset(set(transaction))) / len(dataset)

In [83]:
def generate_candidates(itemset, size):
  return [frozenset(set(item1).union(item2)) for item1 in itemset for item2 in itemset if len(set(item1).union(item2)) == size]

In [84]:
def apriori_algorithm(dataset, min_support):
  k = 2
  unique_items = set(item for transaction in dataset for item in transaction)
  frequent_itemsets = []
  candidates = [frozenset({item}) for item in unique_items]
  frequent_itemsets.extend(itemset for itemset in candidates if Support(dataset, itemset) >= min_support)

  while candidates:
    candidates = generate_candidates(set(frequent_itemsets), k)
    frequent_itemsets.extend(itemset for itemset in candidates if Support(dataset, itemset) >= min_support)
    k += 1

  return frequent_itemsets

In [85]:
def Association_rules(frequent_itemsets, min_confidence, dataset):
  rules = []
  for itemset in frequent_itemsets:
    for i in range(1, len(itemset)):
      for left_hand_side in combinations(itemset, i):
        left_hand_side = frozenset(left_hand_side)
        right_hand_side = itemset - left_hand_side
        rule = (left_hand_side, right_hand_side)

        confidence = Support(dataset, itemset) / Support(dataset, left_hand_side)

        if confidence >= min_confidence:
          rules.append({'Left-hand side': left_hand_side, 'Right-hand side': right_hand_side, 'Support': Support(dataset, itemset), 'Confidence': confidence, 'Lift': confidence / Support(dataset, right_hand_side),})

  return rules

In [86]:
def main():
  dataset = load_dataset() #Loading the dataset
  min_support = 0.12 #Mining frequent itemsets with min_support threshold of 0.12
  frequent_itemsets = apriori_algorithm(dataset, min_support)
  min_confidence = 0.1 #Generating association rules with min_confidence threshold of 0.1
  association_rules = Association_rules(frequent_itemsets, min_confidence, dataset)

  df_rules = pd.DataFrame(association_rules)

  print("Frequent Itemsets:")
  print(frequent_itemsets)

  print("Association Rules:")
  print(df_rules)

if __name__ == "__main__":
    main()

Frequent Itemsets:
[frozenset({'fruit/vegetable juice'}), frozenset({'pork'}), frozenset({'root vegetables'}), frozenset({'pip fruit'}), frozenset({'newspapers'}), frozenset({'frankfurter'}), frozenset({'sausage'}), frozenset({'bottled beer'}), frozenset({'tropical fruit'}), frozenset({'yogurt'}), frozenset({'whole milk'}), frozenset({'bottled water'}), frozenset({'domestic eggs'}), frozenset({'whipped/sour cream'}), frozenset({'pastry'}), frozenset({'citrus fruit'}), frozenset({'canned beer'}), frozenset({'shopping bags'}), frozenset({'other vegetables'}), frozenset({'curd'}), frozenset({'soda'}), frozenset({'rolls/buns'}), frozenset({'brown bread'}), frozenset({'butter'}), frozenset({'rolls/buns', 'whole milk'}), frozenset({'rolls/buns', 'other vegetables'}), frozenset({'whole milk', 'rolls/buns'}), frozenset({'whole milk', 'soda'}), frozenset({'whole milk', 'yogurt'}), frozenset({'whole milk', 'other vegetables'}), frozenset({'whole milk', 'soda'}), frozenset({'other vegetables', 's

**Support** - The support of an item x is the ratio of the number of transactions in which the item x is present with respect to the total number of transactions. Support = Transactions containing x / Total transactions
Higher support means that the itemset is frequently occurring, making it a candidate for a "frequent" itemset.

**Confidence** - Confidence is the reliability of an association rule. It is calculated as follows: Support(lhs Union rhs) / Support(lhs).
Higher the confidence means that there is a strong relationship between the lhs and the rhs.

**Lift** - Lift is the likelihood of the item y being purchased when item x is sold. It is the Confidence / Support(rhs).
Lift values greater than 1 indicate that the rule has a positive impact, suggesting that the occurrence of the antecedent increases the likelihood of the consequent.

These metrics help us discover associations between items frequently purchased together.
Support identifies popular items or itemsets, while confidence helps in revealing reliable associations between items. Lift shows us whether the association is meaningful or just a random occurrence.

For example,
If rolls/buns -> whole milk has a support of 0.17(17%), it means that 17% of transactions include whole milk.
If it has a confidence of 0.51(51%), 51% of transactions with rolls/buns also include whole milk.
If it has a lift of 1.1, whole milk is 1.1 times more likely to be bought when rolls/buns are purchased.

The Apriori algorithm, by using these metrics, helps businesses uncover patterns and relationships within transactional data. These patterns are then translated into actionable insights for improving various aspects of business operations, such as product placement, marketing strategies, and customer experience.