# Table of contents

#### 1- Importing libraries/dataframe
#### 2- Sorting products by price range: 
                                   01- If statements using loc
                                   02- If statements with for loops¶
#### 3- Editing column to make it busiest days and least busy days
#### 4- Creating busiest periods column
#### 5- Exporting dataframe

## 1 Importing libraries/dataframe

In [33]:
#importing libraries
import pandas as pd
import numpy as np
import os

In [34]:
#importing ords_prods_merged dataset
path=r'/Users/Amaikuru/Desktop/08-06-2022 Instacart Basket Analysis.nosync'

In [35]:
ords_prods_merged=pd.read_pickle(os.path.join(path, '02 Data', 'Prepared', 'orders_products_merged.pkl'))

In [36]:
#using subset of 1million entries
df=ords_prods_merged[:1000000]

## 2 Sorting products by price range

### 01 If statements using loc

In [41]:
#defining function, prices >15 are high range products
df.loc[df['prices'] > 15, 'price_range_loc'] = 'High-range product'

In [42]:
#prices less than or equals to 15 and greater than 5 are mid range products
df.loc[(df['prices'] <= 15) & (df['prices'] > 5), 'price_range_loc'] = 'Mid-range product' 

In [43]:
#prices less than or equals to 5 are low range products
df.loc[df['prices'] <= 5, 'price_range_loc'] = 'Low-range product'

In [44]:
df['price_range_loc'].value_counts()

Mid-range product     652638
Low-range product     338018
High-range product      9344
Name: price_range_loc, dtype: int64

In [45]:
#copying frequency to clipboard
df['price_range_loc'].value_counts().to_clipboard()

In [46]:
#applying to original dataset
ords_prods_merged.loc[ords_prods_merged['prices'] > 15, 'price_range_loc'] = 'High-range product'

In [47]:
ords_prods_merged.loc[(ords_prods_merged['prices'] <= 15) & (ords_prods_merged['prices'] > 5), 'price_range_loc'] = 'Mid-range product' 

In [48]:
ords_prods_merged.loc[ords_prods_merged['prices'] <= 5, 'price_range_loc'] = 'Low-range product'

### 02 If statements with for loops

In [49]:
#creating loop for the busiest day of the week
result = []

for value in ords_prods_merged["orders_day_of_week"]:
  if value == 0:
    result.append("Busiest day")
  elif value == 4:
    result.append("Least busy")
  else:
    result.append("Regularly busy")

In [51]:
# applying result to new column
ords_prods_merged['busiest_day']=result

In [62]:
ords_prods_merged['busiest_day'].value_counts().to_clipboard()

## 3 Editing column to make it busiest days and least busy days

In [52]:
#counting frequency
ords_prods_merged["orders_day_of_week"].value_counts(dropna=False)

0    6204182
1    5660230
6    4496490
2    4213830
5    4205791
3    3840534
4    3783802
Name: orders_day_of_week, dtype: int64

#### Saturday and Sunday are the busiest days, Monday and Tuesday are the least busy days

In [56]:
#defining loop for busiest days
result = []

for value in ords_prods_merged["orders_day_of_week"]:
  if (value == 0) or (value== 1):
    result.append("Busiest days")
  elif (value == 4) or (value==3):
    result.append("Least busy days")
  else:
    result.append("Regularly busy")

In [57]:
#creating busiest_days column
ords_prods_merged['busiest_days']=result

In [58]:
#checking for accuracy
ords_prods_merged.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,add_to_cart_order,reordered,_merge,price_range_loc,busiest_day,busiest_days
0,1,Chocolate Sandwich Cookies,61,19,5.8,3139998,138,28,6,11,3.0,5,0,both,Mid-range product,Regularly busy,Regularly busy
1,1,Chocolate Sandwich Cookies,61,19,5.8,1977647,138,30,6,17,20.0,1,1,both,Mid-range product,Regularly busy,Regularly busy
2,1,Chocolate Sandwich Cookies,61,19,5.8,389851,709,2,0,21,6.0,20,0,both,Mid-range product,Busiest day,Busiest days
3,1,Chocolate Sandwich Cookies,61,19,5.8,652770,764,1,3,13,,10,0,both,Mid-range product,Regularly busy,Least busy days
4,1,Chocolate Sandwich Cookies,61,19,5.8,1813452,764,3,4,17,9.0,11,1,both,Mid-range product,Least busy,Least busy days


In [59]:
#checking frequency
ords_prods_merged['busiest_days'].value_counts(dropna=False)

Regularly busy     12916111
Busiest days       11864412
Least busy days     7624336
Name: busiest_days, dtype: int64

In [63]:
ords_prods_merged['busiest_days'].value_counts(dropna=False).to_clipboard()

#### The results are now aggregates of the original frequencies

## 4 Creating busiest periods column

In [64]:
#looking at frequency
ords_prods_merged['order_hour_of_day'].value_counts(dropna=False)

10    2761760
11    2736140
14    2689136
15    2662144
13    2660954
12    2618532
16    2535202
9     2454203
17    2087654
8     1718118
18    1636502
19    1258305
20     976156
7      891054
21     795637
22     634225
23     402316
6      290493
0      218769
1      115700
5       87961
2       69375
4       53242
3       51281
Name: order_hour_of_day, dtype: int64

#### The fewest orders are in the early morning from 0-6
#### the most orders occur in the daytime from 9-16
#### average orders occur in the evening/night from 7-8 and 17-23



In [65]:
#creating lists for loop
fewest_orders=[0,1,2,3,4,5,6]
most_orders=[9,10,11,12,13,14,15,16]


In [66]:
#writing loop function for busiest periods column
result=[]
for hour in ords_prods_merged['order_hour_of_day']:
    if hour in fewest_orders:
        result.append('Fewest orders')
    elif hour in most_orders:
        result.append('Most orders')
    else:
        result.append('Average orders')
    


In [67]:
#adding column to table
ords_prods_merged['busiest_periods']=result

In [68]:
ords_prods_merged.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id,prices,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,add_to_cart_order,reordered,_merge,price_range_loc,busiest_day,busiest_days,busiest_periods
0,1,Chocolate Sandwich Cookies,61,19,5.8,3139998,138,28,6,11,3.0,5,0,both,Mid-range product,Regularly busy,Regularly busy,Most orders
1,1,Chocolate Sandwich Cookies,61,19,5.8,1977647,138,30,6,17,20.0,1,1,both,Mid-range product,Regularly busy,Regularly busy,Average orders
2,1,Chocolate Sandwich Cookies,61,19,5.8,389851,709,2,0,21,6.0,20,0,both,Mid-range product,Busiest day,Busiest days,Average orders
3,1,Chocolate Sandwich Cookies,61,19,5.8,652770,764,1,3,13,,10,0,both,Mid-range product,Regularly busy,Least busy days,Most orders
4,1,Chocolate Sandwich Cookies,61,19,5.8,1813452,764,3,4,17,9.0,11,1,both,Mid-range product,Least busy,Least busy days,Average orders


In [69]:
#printing frequency
ords_prods_merged['busiest_periods'].value_counts(dropna=False)

Most orders       21118071
Average orders    10399967
Fewest orders       886821
Name: busiest_periods, dtype: int64

In [72]:
ords_prods_merged['busiest_periods'].value_counts(dropna=False).to_clipboard()

## 5 Exporting dataframe

In [73]:
#Exporting dataframe as pickle
ords_prods_merged.to_pickle(os.path.join(path, '02 Data','Prepared', 'orders_products_merged_newcols.pkl'))