In [1]:
#Importing necessary libraries
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
#Load datasets
orders = pd.read_csv('orders.csv')
order_products_prior = pd.read_csv('order_products__prior.csv')
products = pd.read_csv('products.csv')

In [7]:
# View the first few records of each dataframe
print(orders.head())
print(order_products_prior.head())
print(products.head())

# Check the shape of each dataframe
print(orders.shape)
print(order_products_prior.shape)
print(products.shape)

# Display column names
print(orders.columns)
print(order_products_prior.columns)
print(products.columns)


   order_id  user_id eval_set  order_number  order_dow  order_hour_of_day  \
0   2539329        1    prior             1          2                  8   
1   2398795        1    prior             2          3                  7   
2    473747        1    prior             3          3                 12   
3   2254736        1    prior             4          4                  7   
4    431534        1    prior             5          4                 15   

   days_since_prior_order  
0                     NaN  
1                    15.0  
2                    21.0  
3                    29.0  
4                    28.0  
   order_id  product_id  add_to_cart_order  reordered
0         2       33120                1.0        1.0
1         2       28985                2.0        1.0
2         2        9327                3.0        0.0
3         2       45918                4.0        1.0
4         2       30035                5.0        0.0
   product_id                                

In [15]:
##Apriori Algorithm

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Randomly sample 10000 orders
data_subset = orders.sample(n=10000, random_state=42)

# Merge order_products_prior with products
merged_data = pd.merge(order_products_prior, products, on='product_id')

# Merge sampled orders data with merged_data to get complete information
complete_data = pd.merge(data_subset, merged_data, on='order_id')

# Discard unnecessary columns and keep order_id and product_name
data = complete_data[['order_id', 'product_name']]

# Create a one-hot encoded dataframe
data_one_hot = data.groupby(['order_id', 'product_name']).size().unstack().fillna(0).astype(bool).astype(int)

# Generate frequent itemsets using Apriori algorithm
frequent_itemsets = apriori(data_one_hot, min_support=0.01, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Rename the columns
rules = rules.rename(columns={"antecedents": "Item 1", "consequents": "Item 2"})

# Select the necessary columns
rules = rules[["Item 1", "Item 2", "confidence", "support", "lift"]]

# Sort the dataframe by 'lift' and 'Item 1'
rules = rules.sort_values(by=["lift", "Item 1"], ascending=False)

# Display the final dataframe
print(rules)


                      Item 1                    Item 2  confidence   support  \
28             (Large Lemon)                   (Limes)    0.264706  0.012336   
29                   (Limes)             (Large Lemon)    0.263158  0.012336   
37    (Organic Strawberries)     (Organic Raspberries)    0.154799  0.013706   
36     (Organic Raspberries)    (Organic Strawberries)    0.320513  0.013706   
21      (Organic Fuji Apple)                  (Banana)    0.450549  0.011239   
20                  (Banana)      (Organic Fuji Apple)    0.077652  0.011239   
4      (Organic Raspberries)  (Bag of Organic Bananas)    0.352564  0.015077   
5   (Bag of Organic Bananas)     (Organic Raspberries)    0.131894  0.015077   
30    (Organic Hass Avocado)    (Organic Baby Spinach)    0.204082  0.013706   
31    (Organic Baby Spinach)    (Organic Hass Avocado)    0.185874  0.013706   
11                  (Banana)          (Cucumber Kirby)    0.071970  0.010417   
10          (Cucumber Kirby)            

#Business Recommendations based on association rules:


Based on the association rules generated from the recommender system, here are two business recommendations that can help deliver positive outcomes:

Cross-Promotion Strategy: Since there are several strong association rules between different products, one recommendation is to implement a cross-promotion strategy. For example:

Item 1: Large Lemon
Item 2: Limes
Confidence: 0.26
Support: 0.01
Lift: 5.65
Recommendation: Create cross-promotional offers or bundle deals to encourage customers to purchase Large Lemons and Limes together. This strategy can help increase the sales of both items, as customers who buy one item are more likely to buy the other.

Targeted Marketing Campaign: Another recommendation is to leverage the association rules to develop a targeted marketing campaign. For instance:

Item 1: Organic Strawberries
Item 2: Organic Raspberries
Confidence: 0.15
Support: 0.01
Lift: 3.62
Recommendation: Identify customers who frequently purchase Organic Strawberries and target them with personalized promotions or discounts for Organic Raspberries. This targeted marketing campaign can encourage customers to explore complementary products and increase their overall basket size.

By implementing these recommendations, businesses can capitalize on the association rules to optimize cross-selling opportunities, enhance customer satisfaction, and drive revenue growth.