# 1. Setting up installing and importing libraries

In [1]:
!pip install apyori



In [2]:
import pandas as pd

# 2. Load and preprocess data

In [3]:
raw_data = pd.read_csv('example1.csv', header = None)
#df = raw_data.copy()
#df.head(10)

In [4]:
#df.info()

In [5]:
transactions = [] # to hold non NaN values

In [6]:
for index, row in raw_data.iterrows(): # Iterating over each row in the raw data
    transaction = []
    for item in row: # Iterating over each item in the row
        if pd.notnull(item): # Checking if the item is not NaN
            transaction.append(item) # Appending non-NaN item to the transaction list
    transactions.append(transaction) # Appending the transaction to the list of transactions

In [7]:
transactions[1]

['burgers', 'meatballs', 'eggs']

# 3. Import apyori and apply apriori algorithm

In [8]:
from apyori import apriori

In [9]:
# Applying the Apriori algorithm to find association rules
rules = apriori(transactions, 
                min_support = 0.003, # Minimum support threshold
                min_confidence = 0.2, # Minimum confidence threshold
                min_lift = 3, # Minimum lift threshold
                min_length = 2, # Minimum number of items in a rule
                max_length =2) # Maximum number of items in a rule

In [10]:
type(rules)

generator

In [11]:
results = list(rules) # Converting the rules into a list

In [12]:
results

[RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'pasta', 'escalope'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

In [13]:
results[0][1]

0.004532728969470737

In [14]:
tuple(results[0][2][0])[0]

frozenset({'light cream'})

# 4. Convert the rules in a meaningful format

In [15]:
# Extracting the antecedents (left hand side), consequents (right hand side),
# support, confidence, and lift for each rule
def convert_dataFrame(results):
    lhs = [tuple(result[2][0][0])[0] for result in results]
    rhs = [tuple(result[2][0][1])[0] for result in results]
    supports = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts = [result[2][0][3] for result in results]
    # Returning a list of tuples containing the extracted information for each rule
    return list(zip(lhs,rhs,supports,confidences,lifts)) 

In [16]:
resultFrame = pd.DataFrame(convert_dataFrame(results),columns = ["Left Hand Side","Right Hand Side","Support","Confidence","Lift"])

In [17]:
resultFrame.nlargest(n = 10, columns = "Lift")

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471
