In [15]:
import pandas as pd
from datetime import datetime
import numpy as np
import os
import gc
pd.set_option('display.max_columns', 100)
from scipy import stats

# Here we will calculate user x product based features such as 
#                                                             1. number of times a user have bough a product
#                                                             2. How frequent a user has reordered a particular product
#                                                             3. user's mean add_to_cart_order for a paricular product
#                                                             4. user's mean add_to_cart_order_relative for a paricular product
#                                                             5. user's mean add_to_cart_order_inverted for a paricular product
#                                                             6. user's first and last order_number for a particular product
#                                                             6. user's first and last date for a particular product
def get_user_product_features(df_prior_final,user,product):
    '''
        Computes user x product features
        Args: 
            df_prior_final :  Users prior order history
            user : DF of user features
            product : DF of product features
        Returns:
                User x product feature DF
    '''
    uxp = df_prior_final.groupby(['user_id', 'product_id']).agg({'order_id' : 'count',
                                                                 'reordered' : 'mean',
                                                                 'add_to_cart_order': 'mean',
                                                                 'add_to_cart_order_relative' : 'mean',
                                                                 'add_to_cart_order_inverted' :  'mean',
                                                                 'order_number_reverse' : ['min','max'],
                                                                 'date' :  ['min','max'],
                                                                'uxp_date_strike' : 'sum',
                                                                 'uxp_order_strike' : 'sum'
                                                                 }).reset_index()
    uxp.columns = ['user_id','product_id','uxp_total_bought','uxp_reorder_ratio','uxp_avg_cart_position',
                   'uxp_add_to_cart_order_relative_mean','uxp_add_to_cart_order_inverted_mean','uxp_last_order_number','uxp_first_order_number',
                   'up_last_order_date','up_first_order_date','uxp_date_strike','uxp_order_strike']
    #uxp.head()
    
    # u_tot_active_prod : Number of products that user has reordered 
    # u_reorder_ratio_bool : mean reorder value across products
    # p_tot_active_usr : Number of users for a product that has been reordered 
    # p_reorder_ratio_bool : mean reorder value across usres for a particular product

    uxp["bool_reordered"] = (uxp["uxp_total_bought"] > 1).astype("int")
    users_fe1 = uxp.groupby('user_id')["bool_reordered"].agg(["mean", "size"]).reset_index()\
                                            .rename(index=str, columns={"mean": "u_reorder_ratio_bool", "size": "u_tot_active_prod"})
    # Merging back to user feature DF
    user = pd.merge(user, users_fe1, on="user_id",how="left")
    product_fe1 = uxp.groupby('product_id')["bool_reordered"].agg(["mean", "size"]).reset_index()\
                                            .rename(index=str, columns={"mean": "p_reorder_ratio_bool", "size": "p_tot_active_usr"})
    product = pd.merge(product, product_fe1, on="product_id",how="left")
    uxp.drop(['bool_reordered'],axis=1,inplace=True)
    del users_fe1,product_fe1
    
    #product bought by users in the last_five orders. This will capture recency of user-product
    df_prior_final['order_number_back'] = df_prior_final.groupby('user_id')['order_number'].transform(max) - df_prior_final.order_number + 1 
    last_five = df_prior_final[df_prior_final.order_number_back <= 5]
    last_five = last_five.groupby(['user_id','product_id'])[['order_id']].count()
    last_five.columns = ['uxp_bought_last5']
    uxp = uxp.merge(last_five , on=['user_id', 'product_id'], how='left')
    del [last_five]
    gc.collect()
    #There are productes that were not ordered in last 5 orders. So uxp_bought_last5 will be 0 for them
    #filling the uxp_bought_last5 NAN values with 0.
    uxp.fillna(0, inplace=True)
    #uxp.head()
    return uxp,user,product