# 0. Contents

1. Average department orders

2. Creating max_order columns

3. Creating loyalty_flag

# 1. Importing Libraries and Data

In [1]:
# importing libraries
import pandas as pd
import numpy as np
import os

In [2]:
# creating main path
path = r'C:\Users\muffi\OneDrive\CareerFoundry\Instacart Basket Analysis 2023'

In [3]:
# importing data
ords_prods_merge = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', 'orders_products_merged_v2.pkl'))

In [None]:
# checking the imported df
ords_prods_merge.head()

In [4]:
# creating a subset of the main df
df = ords_prods_merge[:1000000]

In [None]:
# checking sub df dimensions
df.shape

In [None]:
# checking sub df 
df.head()

# 2. Grouping Data with pandas

In [None]:
# .groupby() does things invisible to the coder eye
# useless by itself
df.groupby('product_name')

When using .groupby() always 1. split the data in groups based on some critera, 2. apply a func to each group separately, and 3. store the results somewhere

# 3. Aggregating Data with agg()

In [5]:
# calculating avg number of orders per department
# .groupby() creates pandas object for the department ids (like a pivot table )
# agg() func returns the mean of the given col (our col is order number)

df.groupby('department_id').agg({'order_number':['mean']})
# could also use df.groupby('department_id')['order_number'].mean() to do the same thing
# same with df.groupby('department_id').order_number.mean(), but it makes your code looser

Unnamed: 0_level_0,order_number
Unnamed: 0_level_1,mean
department_id,Unnamed: 1_level_2
1,15.577493
2,17.320781
3,16.084944
4,17.530458
5,14.763075
6,16.658449
7,17.03159
8,15.076662
9,15.44758
10,18.681852


In [None]:
# calculating the mean, min, and max order numbers by department 
df.groupby('department_id').agg({'order_number':['mean', 'min', 'max']})

# 4. Aggregating Data with transform()

If the max orders a user has is over 40 they are a "Loyal customer"

If the 10 < max orders <= 40 then "Regular customer"

If max orders <= 10 then "New customer"

In [None]:
# adding a user max_order col 
ords_prods_merge['max_order'] = ords_prods_merge.groupby(['user_id'])['order_number'].transform(np.max)

In [None]:
# checking the new col
ords_prods_merge.head()

In [None]:
# checking more rows
ords_prods_merge.head(100)
# pd.options.display.max_rows = None to change the number of rows viewable 

# 5. Deriving Columns with loc()

In [None]:
# creating the flag for loyal customers (max_order > 40)
ords_prods_merge.loc[ords_prods_merge['max_order'] > 40, 'loyalty_flag'] = 'Loyal customer'

In [None]:
# creating the flag for regular customers (10 < max_order <= 40)
ords_prods_merge.loc[(ords_prods_merge['max_order'] <= 40) & (ords_prods_merge['max_order']> 10), 'loyalty_flag'] = 'Regular customer'

In [None]:
# creating the flag for new customers (max_order <= 10)
ords_prods_merge.loc[ords_prods_merge['max_order'] <= 10, 'loyalty_flag'] = 'New customer'

In [None]:
ords_prods_merge['loyalty_flag'].value_counts(dropna = False)

In [None]:
# checking the new col
ords_prods_merge[['user_id', 'loyalty_flag','order_number']].head(60)