# Apriori Algorithm

In [None]:
import pandas as pd
import numpy as np


In [None]:
data = pd.read_csv("ANUBHAV JHA - Market_Basket_Optimisation (2).csv", header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [None]:
# define the transactions

transactions = []

for i in range(len(data)):
  transactions.append([str(data.values[i,j]) for j in range(0, 20)])


In [None]:
# clean transaction
clean_transactions = []

for transaction in transactions:
  temp = []
  for i in transaction:

    if i != "nan":
      temp.append(i)

  clean_transactions.append(temp)

transactions = clean_transactions
transactions[-1]

['eggs', 'frozen smoothie', 'yogurt cake', 'low fat yogurt']

In [None]:
def get_support(itemset, transactions):
    """Calculates the support of an itemset."""
    count = 0
    for transaction in transactions:
        if itemset.issubset(transaction):
            count += 1
    return count / len(transactions)

def generate_candidates(frequent_itemsets_k_minus_1, k):
    """Generates candidate itemsets of size k from frequent itemsets of size k-1."""
    candidates = set()
    frequent_list = list(frequent_itemsets_k_minus_1)
    for i in range(len(frequent_list)):
        for j in range(i + 1, len(frequent_list)):
            itemset1 = set(frequent_list[i])
            itemset2 = set(frequent_list[j])
            union = itemset1.union(itemset2)
            if len(union) == k:
                candidates.add(frozenset(union))
    return candidates

In [None]:
def apriori(transactions, min_support):

    # 1. Generate frequent 1-itemsets
    item_counts = {}
    for transaction in transactions:
        for item in transaction:
            if item in item_counts:
                item_counts[item] += 1
            else:
                item_counts[item] = 1

    num_transactions = len(transactions)
    frequent_itemsets_1 = {
        frozenset([item]): count / num_transactions
        for item, count in item_counts.items()
        if count / num_transactions >= min_support
    }

    all_frequent_itemsets = frequent_itemsets_1.copy()
    frequent_itemsets_k_minus_1 = set(frequent_itemsets_1.keys())
    k = 2

    # 2. Generate frequent k-itemsets (k > 1)
    while frequent_itemsets_k_minus_1:
        candidates = generate_candidates(frequent_itemsets_k_minus_1, k)
        frequent_itemsets_k = {}

        for candidate in candidates:
            support = get_support(candidate, transactions)
            if support >= min_support:
                frequent_itemsets_k[candidate] = support

        all_frequent_itemsets.update(frequent_itemsets_k)
        frequent_itemsets_k_minus_1 = set(frequent_itemsets_k.keys()) #added this line to fix the problem.
        k += 1

    return all_frequent_itemsets

In [None]:
min_support = 0.05

frequent_itemsets = apriori(transactions, min_support)

In [None]:
frequent_itemsets_list = []
for itemset, support in frequent_itemsets.items():
    itemset_str = ', '.join(itemset)  # Convert itemset to a comma-separated string
    frequent_itemsets_list.append({'Itemset': itemset_str, 'Support': support})

# Create a pandas DataFrame from the list of dictionaries
frequent_itemsets_df = pd.DataFrame(frequent_itemsets_list)

# Display the DataFrame
print(frequent_itemsets_df)

                     Itemset   Support
0                     shrimp  0.071457
1             low fat yogurt  0.076523
2                  green tea  0.132116
3              mineral water  0.238368
4            frozen smoothie  0.063325
5                  olive oil  0.065858
6                    burgers  0.087188
7                       eggs  0.179709
8                     turkey  0.062525
9                       milk  0.129583
10          whole wheat rice  0.058526
11              french fries  0.170911
12                      soup  0.050527
13         frozen vegetables  0.095321
14                 spaghetti  0.174110
15                   cookies  0.080389
16               cooking oil  0.051060
17                 chocolate  0.163978
18                   chicken  0.059992
19                  tomatoes  0.068391
20                  pancakes  0.095054
21             grated cheese  0.052393
22               ground beef  0.098254
23                  escalope  0.079323
24                      c

## Using ML-Xtend

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

# Convert the DataFrame to a list of transactions
transactions = data.values.tolist()

# Convert all elements within each transaction to strings
transactions = [[str(item) for item in transaction] for transaction in transactions]

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)  # Pass the list of transactions
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7499,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
from mlxtend.frequent_patterns import apriori

apriori(df, min_support=0.05, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.087188,(burgers)
1,0.081056,(cake)
2,0.059992,(chicken)
3,0.163845,(chocolate)
4,0.080389,(cookies)
5,0.05106,(cooking oil)
6,0.179709,(eggs)
7,0.079323,(escalope)
8,0.170911,(french fries)
9,0.063325,(frozen smoothie)


In [None]:
# display item name