**Aim :** To perform Apriori Algorithmon the given dataset using
1. By creating functions.
2. By using NumPy and Apriori library.
3. WEKA tool

And validating the results


In [None]:
!pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


---
1. By creating Functions
2. By using libraries

---

In [None]:
# Imorting libraries
import itertools
from apyori import apriori
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Define the frequent itemset mining function
def Apriori(transactions, min_support):
    # Get the unique items in the transactions
    items = set(item for transaction in transactions for item in transaction)
    
    # Filter out items that do not meet the minimum support threshold
    freq_items = {frozenset([item]): 0 for item in items}
    for transaction in transactions:
        for item in transaction:
            freq_items[frozenset([item])] += 1
    freq_items = {itemset: freq_items[itemset] for itemset in freq_items 
                                                    if freq_items[itemset] >= min_support}
    
    # Loop through itemset sizes and generate candidate itemsets
    k = 2
    candidates = {frozenset(itemset): 0 for itemset in itertools.combinations(items, k)}
    while candidates:
        # Count the frequency of each candidate itemset in the transactions
        for transaction in transactions:
            for candidate in candidates:
                if candidate.issubset(transaction):
                    candidates[candidate] += 1
        
        # Filter out candidates that do not meet the minimum support threshold
        candidates = {itemset: candidates[itemset] for itemset in candidates 
                                                        if candidates[itemset] >= min_support}
        
        # Add the frequent itemsets to the dictionary
        freq_items.update(candidates)
        
        # Generate the next set of candidate itemsets
        k += 1
        candidates = {frozenset(itemset): 0 for itemset in itertools.combinations(freq_items, k)}
    
    return freq_items

In [None]:
transactions = [['bread', 'milk'], 
                ['bread', 'diapers', 'beer', 'eggs'], 
                ['milk', 'diapers', 'beer', 'cola'], 
                ['bread', 'milk', 'diapers', 'beer'], 
                ['bread', 'milk', 'diapers', 'cola']]

min_support = 3

freq_items = Apriori(transactions, min_support)

df = pd.DataFrame()
df["Items"] = [list(i) for i in freq_items.keys()]
df["Support"] = [j for j in freq_items.values()]
df

Unnamed: 0,Items,Support
0,[diapers],4
1,[bread],4
2,[beer],3
3,[milk],4
4,"[diapers, bread]",3
5,"[diapers, beer]",3
6,"[diapers, milk]",3
7,"[bread, milk]",3


In [None]:
market_df = pd.read_csv("/content/Market_Basket_Optimisation.csv", names=[i for i in range(20)])
market_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


In [None]:
# Generating the list of items 
transactions=[]

for index, row in market_df.iterrows():
    l = [i for i in list(set(row)) if str(i)!="nan"]
    transactions.append(l)

# print(transactions)

# Output - transaction of each day
# [['burgers', 'meatballs', 'eggs'],
#  ['chutney'],
#  ['turkey', 'avocado'],
#  ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'],
#  ['low fat yogurt'],
#  ['whole wheat pasta', 'french fries'], ... ]

In [None]:
len(transactions)

7501

In [None]:
rules=apriori(transactions=transactions,
              
              # (min 3 times per day * 7 days)/7501 
              min_support=0.003,

              min_confidence=0.2,

              # relevance of the rule. good lift is at least 3.
              min_lift = 3,

              # 2 products to have at the end, A and B
              min_length=2,
              max_length=2)

results = list(rules)

In [None]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))


    
results_df = pd.DataFrame(inspect(results), 
                columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

results_df.sort_values(by=["Lift"], ascending=False)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471
