In [76]:
import pandas as pd
import numpy as np

In [77]:
%store -r final_merge

In [78]:
final_merge.head()

Unnamed: 0,order_id,order_date,state,region,ship_mode,ready_to_ship_date,pickup_date,arrival_scan_date,order_started_day
0,CA-2017-100006,2017-09-07,New York,East,Standard Class,NaT,NaT,NaT,Thursday
1,CA-2017-100090,2017-07-08,California,West,Standard Class,NaT,NaT,NaT,Saturday
2,CA-2017-100293,2017-03-14,Florida,South,Standard Class,NaT,NaT,NaT,Tuesday
3,CA-2017-100328,2017-01-28,New York,East,Standard Class,NaT,NaT,NaT,Saturday
4,CA-2017-100363,2017-04-08,Arizona,West,Standard Class,NaT,NaT,NaT,Saturday


In [79]:
order_start_day = final_merge.copy()

In [80]:
order_start_day = order_start_day.filter(items=['order_date','ready_to_ship_date'])
order_start_day.head(2)

Unnamed: 0,order_date,ready_to_ship_date
0,2017-09-07,NaT
1,2017-07-08,NaT


In [81]:
order_start_day.shape

(5010, 2)

In [82]:
# assess how many null values are in a dataframe with pandas
order_start_day.isnull().sum()

order_date               0
ready_to_ship_date    4806
dtype: int64

In [83]:
# dropping the null values
order_start_day.dropna(subset=['ready_to_ship_date'],inplace=True)

In [84]:
# create a new column based of the date column with the day of the week
order_start_day['order_day'] = order_start_day['order_date'].dt.dayofweek
order_start_day.head(2)

Unnamed: 0,order_date,ready_to_ship_date,order_day
1703,2019-09-20,2019-09-24,4
1711,2019-09-24,2019-09-30,1


In [85]:
# create a new column based of the date column with the day of the week
order_start_day['order_day_name'] = order_start_day['order_date'].dt.day_name()

order_start_day.head(2)

Unnamed: 0,order_date,ready_to_ship_date,order_day,order_day_name
1703,2019-09-20,2019-09-24,4,Friday
1711,2019-09-24,2019-09-30,1,Tuesday


In [86]:
# Calculate the processing date
order_start_day['order_processing'] = np.where(order_start_day['order_date'].dt.dayofweek == 5,
                                  order_start_day['order_date'] + pd.Timedelta(days=2),               
                                  order_start_day['order_date'])       
order_start_day['order_processing'] = np.where(order_start_day['order_date'].dt.dayofweek == 6,
                                  order_start_day['order_date'] + pd.Timedelta(days=1),               
                                  order_start_day['order_date'])                                
order_start_day



Unnamed: 0,order_date,ready_to_ship_date,order_day,order_day_name,order_processing
1703,2019-09-20,2019-09-24,4,Friday,2019-09-20
1711,2019-09-24,2019-09-30,1,Tuesday,2019-09-24
1740,2019-09-11,2019-09-16,2,Wednesday,2019-09-11
1747,2019-09-19,2019-09-23,3,Thursday,2019-09-19
1748,2019-09-06,2019-09-09,4,Friday,2019-09-06
...,...,...,...,...,...
4985,2020-11-02,2020-11-04,0,Monday,2020-11-02
4988,2020-11-13,2020-11-16,4,Friday,2020-11-13
4990,2020-11-30,2020-12-03,0,Monday,2020-11-30
5002,2020-11-04,2020-11-04,2,Wednesday,2020-11-04


In [92]:
for index, row in order_start_day.iterrows():
    day_of_week = row['order_date'].dayofweek
    if day_of_week == 4:  # Friday
        order_start_day.loc[index, 'order_processing'] = row['order_date'] + pd.Timedelta(days=3)
    elif day_of_week == 5:  # Saturday
        order_start_day.loc[index, 'order_processing'] = row['order_date'] + pd.Timedelta(days=2)
    else:
        order_start_day.loc[index, 'order_processing'] = row['order_date'] + pd.Timedelta(days=1)
order_start_day

Unnamed: 0,order_date,ready_to_ship_date,order_day,order_day_name,order_processing
1703,2019-09-20,2019-09-24,4,Friday,2019-09-23
1711,2019-09-24,2019-09-30,1,Tuesday,2019-09-25
1740,2019-09-11,2019-09-16,2,Wednesday,2019-09-12
1747,2019-09-19,2019-09-23,3,Thursday,2019-09-20
1748,2019-09-06,2019-09-09,4,Friday,2019-09-09
...,...,...,...,...,...
4985,2020-11-02,2020-11-04,0,Monday,2020-11-03
4988,2020-11-13,2020-11-16,4,Friday,2020-11-16
4990,2020-11-30,2020-12-03,0,Monday,2020-12-01
5002,2020-11-04,2020-11-04,2,Wednesday,2020-11-05


In [88]:
# Filter rows where the day of the week is Sunday
sunday_orders = order_start_day[order_start_day['order_date'].dt.dayofweek == 6]  # 6 represents Sunday
sunday_orders

Unnamed: 0,order_date,ready_to_ship_date,order_day,order_day_name,order_processing
2896,2020-11-01,2020-11-05,6,Sunday,2020-11-02
2987,2020-11-15,2020-11-23,6,Sunday,2020-11-16
3910,2020-11-08,2020-11-16,6,Sunday,2020-11-09


In [89]:
# Calculate the processing date
order_start_day['order_processing'] = np.where(order_start_day['order_date'].dt.dayofweek >= 4,       # Friday, Saturday, or Sunday
                                  order_start_day['order_date'] + pd.Timedelta(days=3),               # Add 3 days to move to Monday
                                  order_start_day['order_date'])                                      # Otherwise keep the order_date
order_start_day


Unnamed: 0,order_date,ready_to_ship_date,order_day,order_day_name,order_processing
1703,2019-09-20,2019-09-24,4,Friday,2019-09-23
1711,2019-09-24,2019-09-30,1,Tuesday,2019-09-24
1740,2019-09-11,2019-09-16,2,Wednesday,2019-09-11
1747,2019-09-19,2019-09-23,3,Thursday,2019-09-19
1748,2019-09-06,2019-09-09,4,Friday,2019-09-09
...,...,...,...,...,...
4985,2020-11-02,2020-11-04,0,Monday,2020-11-02
4988,2020-11-13,2020-11-16,4,Friday,2020-11-16
4990,2020-11-30,2020-12-03,0,Monday,2020-11-30
5002,2020-11-04,2020-11-04,2,Wednesday,2020-11-04
