# 3. Recommendation Engine 


In [59]:
# Libraries needed
import pandas as pd
from datetime import datetime
import pickle
import re

## 3.1 Data Frames

* Transactions details
* Market Basket Analysis - Product Level
* Market Basket Analysis - Product Type Level

In [60]:
purchase_data = pd.read_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/2.- Market Basket Analysis/Purchase Data - No Refunds.csv')
print(purchase_data.shape)

# Step 1: Group by Order ID and Product to count occurrences
count_per_order = purchase_data.groupby(['Order ID', 'Product']).size().reset_index(name='Count Per Order')

# Step 2: Merge the count data back into the original DataFrame
df_with_counts = pd.merge(purchase_data, count_per_order, on=['Order ID', 'Product'], how='left')

print(df_with_counts.shape)
df_with_counts.head()

(25089, 15)
(25089, 16)


Unnamed: 0,Sale ID,Date,Order ID,Order,Product,Net sales,Payment type,Credit card,Credit card type,Billing country,Refunds,Net payments,returned_item_rate,product_type,sales_count,Count Per Order
0,13087920095372,2022-06-01 09:55:27+00:00,4089814286476,#19176,buttertarts,22.5,Shopify Payments,Mastercard,Standard,Canada,0.0,147.15,0.02301,pastry,368,3
1,13087920160908,2022-06-01 09:55:27+00:00,4089814286476,#19176,buttertarts,22.5,Shopify Payments,Mastercard,Standard,Canada,0.0,147.15,0.02301,pastry,368,3
2,13087920193676,2022-06-01 09:55:27+00:00,4089814286476,#19176,baked fruit tart,20.95,Shopify Payments,Mastercard,Standard,Canada,0.0,147.15,0.02238,pie,275,2
3,13087920029836,2022-06-01 09:55:27+00:00,4089814286476,#19176,baked fruit tart,13.5,Shopify Payments,Mastercard,Standard,Canada,0.0,147.15,0.02238,pie,275,2
4,13087920062604,2022-06-01 09:55:27+00:00,4089814286476,#19176,breakfast pastry platter,40.0,Shopify Payments,Mastercard,Standard,Canada,0.0,147.15,0.00724,breakfast,119,1


In [61]:
df_with_counts.to_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/2.- Market Basket Analysis/Purchase Data with Counts Per Order.csv', index=False)

In [62]:
rules_products = pd.read_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/2.- Market Basket Analysis/Rules and Product Types and Sales Counts.csv')
rules_product_types = pd.read_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/2.- Market Basket Analysis/New Rules and Product Types and Sales Counts.csv')

print('Products rules shape:', rules_products.shape)
print('\nProducts rules Data types and columns:', rules_products.dtypes)
print('\nProduct Types rules shape:', rules_product_types.shape)
print('\nProduct Types Data types and columns:', rules_product_types.dtypes)
print('\nTransactions information shape:', df_with_counts.shape)
print('\nTransactions information Data types and columns:', df_with_counts.dtypes)

Products rules shape: (18, 15)

Products rules Data types and columns: Unnamed: 0                   int64
antecedents                 object
consequents                 object
antecedent support         float64
consequent support         float64
support                    float64
confidence                 float64
lift                       float64
leverage                   float64
conviction                 float64
zhangs_metric              float64
antecedent_sales_count       int64
antecedent_product_type     object
consequent_sales_count       int64
consequent_product_type     object
dtype: object

Product Types rules shape: (120, 13)

Product Types Data types and columns: Unnamed: 0                        int64
antecedents                      object
consequents                      object
antecedent support              float64
consequent support              float64
support                         float64
confidence                      float64
lift                            f

## 3.2 Recommendation Engine - Product Level

The recommendation engine at the Product Level is quite limited because the Market Basket Analysis did not generate enough association rules. As a result, it will only make recommendations for a small number of products. 

To enhance the recommendation engine's effectiveness, a hybrid approach is needed. In sections 3.3 and 3.4, the engine is expanded to include recommendations at the Product Type level when no product-level recommendations are found. 

Additionally, it will suggest popular products within the same product type that are typically bought around the same date. If no recommendations can be made at the product or product type level, the engine will then recommend the most popular items in general.

In [63]:
# Convert antecedents and consequents to frozensets containing entire product names
rules_products['antecedents'] = rules_products['antecedents'].apply(lambda x: frozenset([x]))
rules_products['consequents'] = rules_products['consequents'].apply(lambda x: frozenset([x]))


In [64]:
def recommend_products(input_products, rules_df, top_n=5):
    recommendations = []
    
    # Convert input_products to a frozenset to match the format in the rules dataframe
    input_frozenset = frozenset(input_products)
    
    # Filter the rules where the antecedents match the input products
    matching_rules = rules_df[rules_df['antecedents'] == input_frozenset]
    
    # Sort the matching rules by lift, confidence, or another metric to get the top N recommendations
    sorted_rules = matching_rules.sort_values(by='lift', ascending=False).head(top_n)
    
    # Extract the recommended products (consequents) from the sorted rules
    for _, rule in sorted_rules.iterrows():
        recommendations.append((rule['consequents'], rule['lift'], rule['confidence']))
    
    return recommendations

# Example usage
input_products = ['apple pie']  # Input from the user or current shopping cart
recommendations = recommend_products(input_products, rules_products, top_n=5)
for rec in recommendations:
    print(f"Recommend: {rec[0]} with Lift: {rec[1]} and Confidence: {rec[2]}")


Recommend: frozenset({'9inch pumpkin pie'}) with Lift: 5.421807423300446 and Confidence: 0.2640306122448979
Recommend: frozenset({'blueberry pie'}) with Lift: 4.46937059577194 and Confidence: 0.0803571428571428
Recommend: frozenset({'12 dinner rolls'}) with Lift: 3.096471239741448 and Confidence: 0.1403061224489795
Recommend: frozenset({'raspberry pie'}) with Lift: 2.760857736446379 and Confidence: 0.0803571428571428


In [65]:
input_products = ['assorted holiday cookies']  # Input from the user or current shopping cart
recommendations = recommend_products(input_products, rules_products, top_n=5)
for rec in recommendations:
    print(f"Recommend: {rec[0]} with Lift: {rec[1]} and Confidence: {rec[2]}")

Recommend: frozenset({'scottish shortbread'}) with Lift: 10.81038940915562 and Confidence: 0.1472995090016366
Recommend: frozenset({'12 dinner rolls'}) with Lift: 2.528408594509904 and Confidence: 0.1145662847790507


## 3.3 Recommendation Engine Combined - Product Level & Product Type Level

The recommendation engine provides product type-based recommendations when no product-level association is found. Here's a brief summary of the recommendations for the product "buttertarts" based on its product type "pastry":

1. **Pastry → Cookies**: 
   - **Lift**: 2.49
   - **Confidence**: 37.44%
   - **Analysis**: Customers who purchase pastries are 2.49 times more likely to also purchase cookies compared to random chance. This suggests that offering cookies alongside pastries could enhance cross-selling opportunities.

2. **Pastry → Savoury Items**: 
   - **Lift**: 1.88
   - **Confidence**: 27.25%
   - **Analysis**: There is a moderate association between pastries and savory items, indicating that customers may prefer to balance sweet and savory flavors in their orders.

3. **Pastry → Bread**: 
   - **Lift**: 1.64
   - **Confidence**: 21.09%
   - **Analysis**: Bread is another item that customers frequently purchase with pastries. This association could be leveraged for meal deals or breakfast promotions.

4. **Pastry → Pie**: 
   - **Lift**: 1.44
   - **Confidence**: 25.59%
   - **Analysis**: There is a significant likelihood that customers who buy pastries also purchase pies, highlighting an opportunity to promote these items together, especially during seasonal events.

5. **Pastry → Cakes**: 
   - **Lift**: 0.33
   - **Confidence**: 15.88%
   - **Analysis**: While the association is weaker, there is still a notable connection between pastries and cakes. This could be a strategic pairing for upselling during special occasions or events.

This output demonstrates that when a direct product-level association is not available, the recommendation engine successfully falls back on product type-level associations, offering relevant suggestions that can help increase sales through cross-promotion.

In [66]:
# Convert antecedents and consequents to frozensets
rules_product_types['antecedents'] = rules_product_types['antecedents'].apply(lambda x: frozenset(eval(x)) if isinstance(x, str) else x)
rules_product_types['consequents'] = rules_product_types['consequents'].apply(lambda x: frozenset(eval(x)) if isinstance(x, str) else x)

# Check if conversion was successful
print(rules_product_types['antecedents'].head())
rules_product_types.head()

0    (holiday)
1    (savoury)
2     (muffin)
3      (bread)
4    (holiday)
Name: antecedents, dtype: object


Unnamed: 0.1,Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_transaction_count,consequent_transaction_count
0,60,(holiday),(savoury),0.013626,0.145126,0.006611,0.485207,3.343346,0.004634,1.660617,0.710581,"(169,)","(1800,)"
1,61,(savoury),(holiday),0.145126,0.013626,0.006611,0.045556,3.343346,0.004634,1.033454,0.819885,"(1800,)","(169,)"
2,12,(muffin),(bread),0.013868,0.128598,0.005563,0.401163,3.119512,0.00378,1.455157,0.688992,"(172,)","(1595,)"
3,13,(bread),(muffin),0.128598,0.013868,0.005563,0.04326,3.119512,0.00378,1.030722,0.779706,"(1595,)","(172,)"
4,9,(holiday),(bread),0.013626,0.128598,0.005402,0.39645,3.082862,0.00365,1.443794,0.684959,"(169,)","(1595,)"


In [67]:
def recommend_products(input_products, rules_products_df, rules_product_types_df, transactions_df, top_n=5):
    recommendations = []
    
    # Convert input_products to a frozenset to match the format in the rules dataframe
    input_frozenset = frozenset(input_products)
    
    # Step 1: Try to find product-level associations
    matching_rules = rules_products_df[rules_products_df['antecedents'] == input_frozenset]
    
    if matching_rules.empty:
        # Step 2: If no product-level associations, fallback to product type associations
        
        # Extract the product type of the input products
        input_product_types = transactions_df[transactions_df['Product'].isin(input_products)]['product_type'].unique()
        
        if len(input_product_types) == 0:
            return recommendations  # No valid product types found
        
        # Convert product types to frozensets
        input_product_types_frozenset = frozenset(input_product_types)
        
        # Find matching product type-level rules
        matching_type_rules = rules_product_types_df[rules_product_types_df['antecedents'] == input_product_types_frozenset]
        
        if not matching_type_rules.empty:
            # Sort the matching product type rules by lift, confidence, or another metric to get the top N recommendations
            sorted_type_rules = matching_type_rules.sort_values(by='lift', ascending=False).head(top_n)
            
            # Extract the recommended product types (consequents) from the sorted rules
            for _, rule in sorted_type_rules.iterrows():
                recommendations.append((rule['consequents'], rule['lift'], rule['confidence']))
    
    else:
        # Sort the matching product-level rules by lift, confidence, or another metric to get the top N recommendations
        sorted_rules = matching_rules.sort_values(by='lift', ascending=False).head(top_n)
        
        # Extract the recommended products (consequents) from the sorted rules
        for _, rule in sorted_rules.iterrows():
            recommendations.append((rule['consequents'], rule['lift'], rule['confidence']))
    
    return recommendations

# Example usage
input_products = ['buttertarts']  # Input from the user or current shopping cart
recommendations = recommend_products(input_products, rules_products, rules_product_types, df_with_counts, top_n=5)
for rec in recommendations:
    print(f"Recommend: {rec[0]} with Lift: {rec[1]} and Confidence: {rec[2]}")


Recommend: frozenset({'cookies'}) with Lift: 1.5190661264869665 and Confidence: 0.1997578692493946
Recommend: frozenset({'savoury'}) with Lift: 1.3430743879472693 and Confidence: 0.1949152542372881
Recommend: frozenset({'bread'}) with Lift: 1.2991673434689217 and Confidence: 0.1670702179176755
Recommend: frozenset({'pie'}) with Lift: 0.937418407983444 and Confidence: 0.1997578692493946
Recommend: frozenset({'cakes'}) with Lift: 0.2447815101893866 and Confidence: 0.1198547215496368


## 3.4 Recommendation Engine Enhanced - Product Level, Product Type Level & Popular Products per Date 

**Recommendation Logic:** 

If product-level associations are not found, and product-type associations are found, the code now recommends 3 popular products within the same product type. If no associations are found at all, the code falls back to recommending the top 5 popular products overall.
This approach tailors the recommendations to offer a mix of associated items and popular products that are seasonally relevant, enhancing the customer's shopping experience.

In [68]:
# Convert antecedents and consequents to frozensets containing entire product names
rules_products['antecedents'] = rules_products['antecedents'].apply(lambda x: frozenset([x]))
rules_products['consequents'] = rules_products['consequents'].apply(lambda x: frozenset([x]))

# Convert antecedents and consequents to frozensets
rules_product_types['antecedents'] = rules_product_types['antecedents'].apply(lambda x: frozenset(eval(x)) if isinstance(x, str) else x)
rules_product_types['consequents'] = rules_product_types['consequents'].apply(lambda x: frozenset(eval(x)) if isinstance(x, str) else x)

# Defining the Recommendation Engine
def preprocess_product_name(product_name):
    """Convert product names to lowercase, replace special characters, and normalize format."""
    # Convert to lowercase
    product_name = product_name.lower()
    
    # Replace inch symbol with the word "inch"
    product_name = re.sub(r'(\d+)"', r'\1inch', product_name)
    
    # Replace other special characters
    product_name = re.sub(r'[^a-zA-Z0-9\s]', '', product_name)
    
    return product_name

def recommend_products(input_products, rules_products_df, rules_product_types_df, transactions_df, top_n=5, popular_n=3):
    recommendations = []
    
    # Preprocess input products
    preprocessed_input_products = [preprocess_product_name(product) for product in input_products]
    
    # Convert preprocessed input_products to a frozenset to match the format in the rules dataframe
    input_frozenset = frozenset(preprocessed_input_products)
    
    # Step 1: Try to find product-level associations
    matching_rules = rules_products_df[rules_products_df['antecedents'] == input_frozenset]
    
    if matching_rules.empty:
        # Step 2: If no product-level associations, fallback to product type associations
        
        # Extract the product type of the input products
        input_product_types = transactions_df[transactions_df['Product'].apply(preprocess_product_name).isin(preprocessed_input_products)]['product_type'].unique()
        
        if len(input_product_types) == 0:
            return recommendations  # No valid product types found
        
        # Convert product types to frozensets
        input_product_types_frozenset = frozenset(input_product_types)
        
        # Find matching product type-level rules
        matching_type_rules = rules_product_types_df[rules_product_types_df['antecedents'] == input_product_types_frozenset]
        
        if not matching_type_rules.empty:
            # Sort the matching product type rules by lift, confidence, or another metric to get the top N recommendations
            sorted_type_rules = matching_type_rules.sort_values(by='lift', ascending=False).head(top_n)
            
            # Extract the recommended product types (consequents) from the sorted rules
            for _, rule in sorted_type_rules.iterrows():
                recommendations.append((rule['consequents'], rule['lift'], rule['confidence']))
            
            # Step 3: Recommend popular products within the same product type bought around the same date
            for consequent in sorted_type_rules['consequents']:
                popular_products_within_type = recommend_popular_products_within_type(consequent, transactions_df, popular_n)
                recommendations.extend(popular_products_within_type)
    
        else:
            # If no matching product type-level rules, fallback to just recommending the most popular products
            recommendations.extend(recommend_popular_products(transactions_df, top_n))
    
    else:
        # Sort the matching product-level rules by lift, confidence, or another metric to get the top N recommendations
        sorted_rules = matching_rules.sort_values(by='lift', confidence=False).head(top_n)
        
        # Extract the recommended products (consequents) from the sorted rules
        for _, rule in sorted_rules.iterrows():
            recommendations.append((rule['consequents'], rule['lift'], rule['confidence']))
    
    return recommendations

def recommend_popular_products(transactions_df, top_n=5):
    """ Recommend the most popular products bought around the current date. """
    today = datetime.today()
    current_month = today.month
    
    # Filter transactions for the current month
    transactions_df['Date'] = pd.to_datetime(transactions_df['Date'])
    filtered_transactions = transactions_df[transactions_df['Date'].dt.month == current_month]
    
    # Group by product and get the count of purchases
    popular_products = filtered_transactions.groupby('Product').size().reset_index(name='count')
    
    # Sort by the most popular and get the top N
    popular_products = popular_products.sort_values(by='count', ascending=False).head(top_n)
    
    # Return as a list of tuples (product, None, None) since these are not association-based recommendations
    return [(frozenset([product]), None, None) for product in popular_products['Product']]

def recommend_popular_products_within_type(product_type_frozenset, transactions_df, popular_n=3):
    """ Recommend the most popular products within a given product type that are bought around the current date. """
    today = datetime.today()
    current_month = today.month
    
    # Filter transactions for the current month and product type
    transactions_df['Date'] = pd.to_datetime(transactions_df['Date'])
    filtered_transactions = transactions_df[(transactions_df['Date'].dt.month == current_month) &
                                            (transactions_df['product_type'].isin(product_type_frozenset))]
    
    # Group by product and get the count of purchases
    popular_products_within_type = filtered_transactions.groupby('Product').size().reset_index(name='count')
    
    # Sort by the most popular and get the top N
    popular_products_within_type = popular_products_within_type.sort_values(by='count', ascending=False).head(popular_n)
    
    # Return as a list of tuples (product, None, None) since these are not association-based recommendations
    return [(frozenset([product]), None, None) for product in popular_products_within_type['Product']]

# Example usage
input_products = ['Buttertarts']  # Input from the user or current shopping cart
recommendations = recommend_products(input_products, rules_products, rules_product_types, df_with_counts, top_n=5, popular_n=3)
for rec in recommendations:
    if rec[1] is not None:
        print(f"Recommend: {rec[0]} with Lift: {rec[1]} and Confidence: {rec[2]}")
    else:
        print(f"Recommend: {rec[0]} as a popular item")


Recommend: frozenset({'cookies'}) with Lift: 1.5190661264869665 and Confidence: 0.1997578692493946
Recommend: frozenset({'savoury'}) with Lift: 1.3430743879472693 and Confidence: 0.1949152542372881
Recommend: frozenset({'bread'}) with Lift: 1.2991673434689217 and Confidence: 0.1670702179176755
Recommend: frozenset({'pie'}) with Lift: 0.937418407983444 and Confidence: 0.1997578692493946
Recommend: frozenset({'cakes'}) with Lift: 0.2447815101893866 and Confidence: 0.1198547215496368
Recommend: frozenset({'6 peanut butter cookies'}) as a popular item
Recommend: frozenset({'6 oatmeal raisin cookies'}) as a popular item
Recommend: frozenset({'cookie trays  individually wrapped'}) as a popular item
Recommend: frozenset({'quiche'}) as a popular item
Recommend: frozenset({'chicken pot pie'}) as a popular item
Recommend: frozenset({'2lb mac  cheese'}) as a popular item
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Re

In [69]:
input_products = ['8inch Lemon Meringue Pie']  # Input from the user or current shopping cart
recommendations = recommend_products(input_products, rules_products, rules_product_types, df_with_counts, top_n=5, popular_n=3)
for rec in recommendations:
    if rec[1] is not None:
        print(f"Recommend: {rec[0]} with Lift: {rec[1]} and Confidence: {rec[2]}")
    else:
        print(f"Recommend: {rec[0]} as a popular item")

Recommend: frozenset({'cookies', 'bread'}) with Lift: 1.4365632745720307 and Confidence: 0.039727582292849
Recommend: frozenset({'savoury', 'bread'}) with Lift: 1.4145223173382713 and Confidence: 0.0559969731365872
Recommend: frozenset({'bread'}) with Lift: 1.4034187425944444 and Confidence: 0.1804767309875142
Recommend: frozenset({'squares'}) with Lift: 1.0989405978055238 and Confidence: 0.0279984865682936
Recommend: frozenset({'scones'}) with Lift: 1.088605733249359 and Confidence: 0.0279984865682936
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Recommend: frozenset({'whole wheat bread'}) as a popular item
Recommend: frozenset({'quiche'}) as a popular item
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Reco

In [71]:
# Save the recommendation engine
import pickle

# Define the path where you want to save the recommendation engine
save_path = '/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/3. Recommendation Engines/hybrid_recommendation_engine.pkl'

# Save the recommendation engine
with open(save_path, 'wb') as file:
    pickle.dump(recommend_products, file)

In [72]:
# Save DataFrames used
rules_products.to_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/3. Recommendation Engines/Rules Products.csv', index=False)
rules_product_types.to_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/3. Recommendation Engines/Rules Product Types.csv', index=False)
df_with_counts.to_csv('/Users/elizaclapasmac/Desktop/Repositories/Final Project/Final-Project---LHL/Data/3. Recommendation Engines/Transactions Details.csv', index=False)

In [73]:
input_products = ['8" Lemon Meringue Pie']  # Input from the user or current shopping cart
recommendations = recommend_products(input_products, rules_products, rules_product_types, df_with_counts, top_n=5, popular_n=3)
for rec in recommendations:
    if rec[1] is not None:
        print(f"Recommend: {rec[0]} with Lift: {rec[1]} and Confidence: {rec[2]}")
    else:
        print(f"Recommend: {rec[0]} as a popular item")

Recommend: frozenset({'cookies', 'bread'}) with Lift: 1.4365632745720307 and Confidence: 0.039727582292849
Recommend: frozenset({'savoury', 'bread'}) with Lift: 1.4145223173382713 and Confidence: 0.0559969731365872
Recommend: frozenset({'bread'}) with Lift: 1.4034187425944444 and Confidence: 0.1804767309875142
Recommend: frozenset({'squares'}) with Lift: 1.0989405978055238 and Confidence: 0.0279984865682936
Recommend: frozenset({'scones'}) with Lift: 1.088605733249359 and Confidence: 0.0279984865682936
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Recommend: frozenset({'whole wheat bread'}) as a popular item
Recommend: frozenset({'quiche'}) as a popular item
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Recommend: frozenset({'multigrain sourdough'}) as a popular item
Recommend: frozenset({'sourdough bread'}) as a popular item
Reco