In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from apyori import apriori

In [2]:
dfAisles = pd.read_csv('aisles.csv')
print("Aisles:")
dfAisles.head(6)

Aisles:


Unnamed: 0,aisle_id,aisle
0,1,prepared soups salads
1,2,specialty cheeses
2,3,energy granola bars
3,4,instant foods
4,5,marinades meat preparation
5,6,other


In [3]:
dfOrders = pd.read_csv('orders.csv')
print("Orders:")
dfOrders.head(6)

Orders:


Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2539329,1,prior,1,2,8,
1,2398795,1,prior,2,3,7,15.0
2,473747,1,prior,3,3,12,21.0
3,2254736,1,prior,4,4,7,29.0
4,431534,1,prior,5,4,15,28.0
5,3367565,1,prior,6,2,7,19.0


In [4]:
dfProducts = pd.read_csv('products.csv')
print("Products:")
dfProducts.head(6)

Products:


Unnamed: 0,product_id,product_name,aisle_id,department_id
0,1,Chocolate Sandwich Cookies,61,19
1,2,All-Seasons Salt,104,13
2,3,Robust Golden Unsweetened Oolong Tea,94,7
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1
4,5,Green Chile Anytime Sauce,5,13
5,6,Dry Nose Oil,11,11


In [5]:
dfOrderProductsPrior = pd.read_csv('order_products__prior.csv')
print("Order Products, Prior:")
dfOrderProductsPrior.head(6)

Order Products, Prior:


Unnamed: 0,order_id,product_id,add_to_cart_order,reordered
0,2,33120,1,1
1,2,28985,2,1
2,2,9327,3,0
3,2,45918,4,1
4,2,30035,5,0
5,2,17794,6,1


In [6]:
dfDepartments = pd.read_csv('departments.csv')
print("Departments:")
dfDepartments.head(6)

Departments:


Unnamed: 0,department_id,department
0,1,frozen
1,2,other
2,3,bakery
3,4,produce
4,5,alcohol
5,6,international


In [13]:
#Returns a list of lists of the orders from the Order Products Prior csv file, of which it takes the first n orders
def ordersList(n):
    dfSmallerOrders = dfOrders.head(n)
    ordered = []
    orderedTogether = []
    for i in dfSmallerOrders['order_id']:
        dfCart = dfOrderProductsPrior.loc[dfOrderProductsPrior['order_id'] == i]
        together = []
        for index, row in dfCart.iterrows():
            together.append(row['product_id'])
        orderedTogether.append(together)
    return orderedTogether

#Get the data to compare to
orderedTogether = ordersList(1000)

# This code performs apriori algorthim on given 2d list, using desired minimum support, minimum confidence, and minimum lift
# and creates sets of min_length or greater
associationRules = apriori(orderedTogether, min_support = 0.005, min_confidence=0.1, min_lift=3, min_length=2)
associationResults = list(associationRules)
#print("Length: " + str(len(associationResults)))

# Below variable is the product for which you want to find other products which would be frequently bought with it
frequentOtherItems = 'Soda'
alreadyListed = []
for item in associationResults:

    pair = item[0] 
    items = [dfProducts.loc[dfProducts['product_id'] == x, 'product_name'].item() for x in pair]
    
    if (frequentOtherItems == items[0] and (items[0], items[1]) not in alreadyListed):
        print("'" + items[0] + "' is frequently bought with '" + items[1] + "' with a confidence of " + str(round(item[2][0][2] * 100,2)) + "%, and support of " + str(round(item[1] * 100,2)) + "%.")
        alreadyListed.append((items[0], items[1]))

    """
    # Below code creates text output for the rule associations
    print("Rule: " + items[0] + " -> " + items[1])
    print("Support: " + str(item[1]))
    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")
    """

'Soda' is frequently bought with 'Hass Avocados' with a confidence of 24.32%, and support of 0.9%.
'Soda' is frequently bought with 'Organic String Cheese' with a confidence of 21.62%, and support of 0.8%.
'Soda' is frequently bought with 'Zero Calorie Cola' with a confidence of 43.24%, and support of 1.6%.


Credit to https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/, for the basic implementation of this apriori function.