In [9]:
import pandas as pd
from datetime import datetime
import numpy as np
import os
import gc
pd.set_option('display.max_columns', 100)
from scipy import stats
# Here we will calculate product based features such as 
#                                                       1. Number of purchases for each product
#                                                       2. How frequent a product has been  reordered
#                                                       3. Mean add to cart for each product.
#                                                       4. user base for each product
#                                                       5. mean order_number
#                                                       6. mean order_number_reverse
#                                                       7. mean date 
def get_product_features(df_prior_final,df_products_aisle_dep):    
    '''
        Computes product features
        Args: 
            df_prior_final :  Users prior order history
            df_products_aisle_dep : DF of product,aisle,department info combined
        Returns:
                product feature DF
    '''
    product = df_prior_final.groupby('product_id').agg({'order_id' : 'count',
                                                    'reordered' : 'mean',
                                                    'add_to_cart_order': 'mean',
                                                    'user_id' : lambda x : x.unique().shape[0],
                                                    'order_number' : "mean",
                                                    'order_number_reverse' : "mean",
                                                     'date' : "mean"
                                                    }).reset_index()
    product.columns = ['product_id','p_total_purchases','p_reorder_ratio','p_avg_cart_position','p_unique_user_count',"p_recency_order",'p_recency_order_rev','p_recency_date']
    product = product.merge(df_products_aisle_dep[['product_id','product_name','aisle_id','department_id']],on='product_id',how='left')
    #print(product.head())
    
    # Product trend : we are taking last 2 orders across user's and calculating product reorder trend
    # p_trend_rt :  no of times product ordered in last order/ no of times product ordered in 2nd last order across user's
    # p_trend_diff : no of times product ordered in last order minus no of times product ordered in 2nd last order across user's
    products_trend = df_prior_final.query("order_number_reverse < 3"). \
        groupby(["product_id", "order_number_reverse"]).size(). \
        rename("p_size").reset_index()

    products_trend["p_trend_rt"] = products_trend["p_size"] / products_trend["p_size"].shift(-1)
    products_trend["p_trend_diff"] = products_trend["p_size"] - products_trend["p_size"].shift(-1)

    # If the product is not ordered in 2nd last order then set p_trend_rt as np.nan
    cond = products_trend["product_id"] != products_trend["product_id"].shift(-1)
    products_trend.loc[cond, "p_trend_rt"] = np.nan
    products_trend.loc[cond, "p_trend_diff"] = np.nan
    products_trend = products_trend.query("order_number_reverse == 1").drop("order_number_reverse", 1)
    product = pd.merge(product, products_trend, how="left", on="product_id")
    del cond, products_trend
    
    # p_freq_days : Mean days before the product is reordered
    # p_freq_order : Mean number of orders before the product is reordered
    product_freq = df_prior_final.copy()
    product_freq = product_freq.sort_values(["user_id", "product_id", "order_number"])

    product_freq["p_freq_days"] = product_freq["date"].shift() - product_freq["date"]
    product_freq["p_freq_order"] = product_freq["order_number"] - product_freq["order_number"].shift()
    product_freq = product_freq.query("reordered == 1")

    product_freq = product_freq.groupby("product_id"). \
                                    agg({'p_freq_days':  "mean", 
                                            'p_freq_order': "mean"}).reset_index()

    product = pd.merge(product, product_freq, how="left", on="product_id")

    del product_freq
    #print(product.head())
    return product