In [3]:
import pandas as pd
from datetime import datetime
import numpy as np
import os
import gc
pd.set_option('display.max_columns', 100)
from scipy import stats
def get_aisle_features(df_prior_final,user):
    '''
        Computes user x aisle features
        Args: 
            df_prior_final :  Users prior order history
            user : DF of user features
        Returns:
                User x aisle feature DF
    '''
    #             1.Number of purchases for each aisle
    #             2.How frequent a aisle has been reordered
    #             3.Mean add to cart for each aisle.
    #             4.user base for each aisle 
    aisle = df_prior_final.groupby('aisle_id').agg({'order_id' : 'count',
                                                    'reordered' : 'mean',
                                                    'add_to_cart_order': 'mean',
                                                    'user_id' : lambda x : x.unique().shape[0]
                                                    }).reset_index()
    aisle.columns = ['aisle_id','a_total_purchases','a_reorder_ratio','a_avg_cart_position','a_unique_user_count']
   # aisle.head()

    # uxa_unique_products_ratio : Ratio of products purchased by the user in this aisle
    uxa = df_prior_final.groupby(['user_id', 'aisle_id']).agg({'product_id' : 'nunique',
                                                                'order_id': 'size' }).reset_index()
    uxa.columns = ['user_id','aisle_id','uxa_total_unique_products','uxa_total_ordered']
    #uxa.head()
    uxa= pd.merge(uxa, user[["user_id", "u_tot_active_prod"]], how="left", on="user_id")
    uxa["uxa_unique_products_ratio"] = uxa["uxa_total_unique_products"] / uxa["u_tot_active_prod"]
    uxa= uxa.drop("u_tot_active_prod", axis=1)
    #uxa
    # a_tot_active_user : Number of users for a aisle that has been reordered 
    # a_reorder_ratio_bool : mean reorder value across usres for a particular aisle
    uxa["uxa_bool_reordered"] = (uxa["uxa_total_ordered"] > 1).astype("int")

    aisles_fe = uxa.groupby('aisle_id')["uxa_bool_reordered"]. \
        agg(["mean", "size"]).reset_index(). \
        rename(index=str, columns={"mean": "a_reorder_ratio_bool", "size": "a_tot_active_user"})

    aisles = pd.merge(aisle, aisles_fe, how="left", on="aisle_id")
    uxa.drop(['uxa_bool_reordered'],axis=1,inplace=True)
    del aisles_fe
    #aisles.head()
    # Merging features
    uxa = uxa.merge(aisle,on='aisle_id')
    del aisle
    return uxa