In [9]:
import pandas as pd
from datetime import datetime
import numpy as np
import os
import gc
pd.set_option('display.max_columns', 100)
from scipy import stats
def get_department_features(df_prior_final,user):
    '''
        Computes user x department features
        Args: 
            df_prior_final :  Users prior order history
            user : DF of user features
        Returns:
                User x department feature DF
    '''
    #             1.Number of purchases for each department
    #             2.How frequent a department has been reordered
    #             3.Mean add to cart for each department.
    #             4.user base for each department 
    department = df_prior_final.groupby('department_id').agg({'order_id' : 'count',
                                                    'reordered' : 'mean',
                                                    'add_to_cart_order': 'mean',
                                                    'user_id' : lambda x : x.unique().shape[0]
                                                    }).reset_index()
    department.columns = ['department_id','d_total_purchases','d_reorder_ratio','d_avg_cart_position','d_unique_user_count']
    department.head()
    # uxa_unique_products_ratio : Ratio of products purchased by the user in this department
    uxd= df_prior_final.groupby(['user_id', 'department_id']).agg({'product_id' : 'nunique',
                                                                'order_id': 'size' }).reset_index()
    uxd.columns = ['user_id','department_id','uxd_total_unique_products','uxd_total_ordered']
    uxd.head()
    uxd= pd.merge(uxd, user[["user_id", "u_tot_active_prod"]], how="left", on="user_id")
    uxd["uxd_unique_products_ratio"] = uxd["uxd_total_unique_products"] / uxd["u_tot_active_prod"]
    uxd= uxd.drop("u_tot_active_prod", axis=1)
    #uxd.head()
    # a_tot_active_user : Number of users for a department that has been reordered 
    # a_reorder_ratio_bool : mean reorder value across usres for a particular department
    uxd["uxd_bool_reordered"] = (uxd["uxd_total_ordered"] > 1).astype("int")

    departments_fe = uxd.groupby('department_id')["uxd_bool_reordered"]. \
        agg(["mean", "size"]).reset_index(). \
        rename(index=str, columns={"mean": "a_reorder_ratio_bool", "size": "a_tot_active_user"})

    department = pd.merge(department, departments_fe, how="left", on="department_id")
    uxd.drop(['uxd_bool_reordered'],axis=1,inplace=True)
    del departments_fe
    department.head()
    uxd = uxd.merge(department,on='department_id')
    del department
    return uxd