In [3]:
# We are adding features
#                        1. date : Inverse cumulative sum of days_since_prior_order means latest orders having low date value 
#                        2. order_number_reverse  : first order is N, last is 1


def add_fe_to_orders(group):
    '''
        Adds date and order_number info to each group
    '''
    group["date"] = group.iloc[::-1]['days_since_prior_order'].cumsum()[::-1].shift(-1).fillna(0)
    max_group = group["order_number"].max()
    group["order_number_reverse"] = max_group - group["order_number"]
    return group
def get_feature_engineering(df_prior_final,df_orders,df_products_aisle_dep):
      '''
        Computes featurized DF
        Args: 
            df_prior_final :  Users prior order history
            df_orders : Order history 
            df_products_aisle_dep : DF of product,aisle,department info combined
        Returns:
                Featurized DF
    '''
    orders = df_orders.groupby("user_id").\
           apply(add_fe_to_orders)
    df_prior_final = df_prior_final.merge(orders[['date','order_number_reverse','order_id']],on='order_id',how='left')
    # order_size : basket size in each order
    # add_to_cart_order_inverted : Inverse position of the product in the cart
    # add_to_cart_order_relative :  Relative position of the product in the cart
    # last_basket_size : basket size previous each order
    order_stat = df_prior_final.groupby('order_id').agg({'order_id': 'size'}) \
                .rename(columns={'order_id': 'order_size'}).reset_index()
    df_prior_final = pd.merge(df_prior_final, order_stat, on='order_id')
    df_prior_final['add_to_cart_order_inverted'] = df_prior_final.order_size - df_prior_final.add_to_cart_order
    df_prior_final['add_to_cart_order_relative'] = df_prior_final.add_to_cart_order / df_prior_final.order_size
    del order_stat

    # last_basket_size to orders 
    last_basket_size = df_prior_final.groupby(["user_id", "order_number_reverse"]).size(). \
        rename("last_basket_size").reset_index()
    last_basket_size["order_number_reverse"] = last_basket_size["order_number_reverse"] - 1

    df_prior_final = pd.merge(df_prior_final, last_basket_size, how="left", on=["user_id", "order_number_reverse"])

    del last_basket_size
    # Here the idea is For an user with 5 orders (NA means the product wasn’t bought yet)
    # 0 0 0 0 1 = 1/2**5 = 0.03125 a product bought the first time and never purchased again
    # 1 1 NA NA NA = 1/2**1 + 1/2**2 = 0.75 a product purchased the last two times
    # 1 0 0 1 NA = 1/2**1 + 1/2**4 = 0.5625
    # It helps to focus on a product that user is recently started reordering 
    df_prior_final["uxp_date_strike"] = 1 / 2 ** (df_prior_final["date"] / 7)
    df_prior_final["uxp_order_strike"] = 1 / 2 ** (df_prior_final["order_number_reverse"])
    user = user_features.get_user_features(df_prior_final,orders)
    product = product_features.get_product_features(df_prior_final,df_products_aisle_dep)
    uxp,user,product = user_product.get_user_product_features(df_prior_final,user,product)
    uxa = aisle.get_aisle_features(df_prior_final,user)
    uxd = department.get_department_features(df_prior_final,user)
    # Now we are going to Merge uxp with user , product,uxa,uxd DataFrame so that we can have user's global features(user), products's global features(product)
    # and user's purchase behavior towards the product(uxp) everthing at same place
    data = uxp.merge(user, on='user_id', how='left').merge(product, on='product_id', how='left')\
                .merge(uxa,on=['user_id', 'aisle_id'],how='left').merge(uxd,on=['user_id', 'department_id'],how='left')
    del user, product, uxp,uxa,uxd
    gc.collect()
    data = product_W2V.get_prod_W2V(df_products,df_prior_final,df_train_final,data)
    return data