In [22]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [23]:
# Floats (decimal numbers) should be displayed rounded with 2 decimal places
pd.options.display.float_format = "{:,.2f}".format
# Set style for plots
plt.style.use('fivethirtyeight')

In [24]:
# read in csv file and display first 5 rows of datset
df = pd.read_csv("data/Orders.csv")
df.head()

Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,Country/Region,City,State,Postal Code,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit
0,27,CA-2019-121755,16/1/2019,Second Class,EH-13945,Eric Hoffmann,Email,United States,Los Angeles,California,90049.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10003027,90.57,3,0.0,11.77
1,45,CA-2019-118255,11/3/2019,First Class,ON-18715,Odella Nelson,Sales,United States,Eagan,Minnesota,55122.0,Central,Special Projects Muesil,Gluten Free,TEC-AC-10000171,45.98,2,0.0,19.77
2,48,CA-2019-169194,20/6/2019,Standard Class,LH-16900,Lena Hernandez,Email,United States,Dover,Delaware,19901.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,45.0,3,0.0,4.95
3,60,CA-2019-111682,17/6/2019,First Class,TB-21055,Ted Butterfield,Email,United States,Troy,New York,12180.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,30.0,2,0.0,3.3
4,63,CA-2018-135545,24/11/2018,Standard Class,KM-16720,Kunst Miller,Email,United States,Los Angeles,California,90004.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10004633,13.98,2,0.0,6.15


In [25]:
df.columns

Index(['Index', 'Order ID', 'Order Date', 'Ship Mode', 'Customer ID',
       'Customer Name', 'Origin Channel', 'Country/Region', 'City', 'State',
       'Postal Code', 'Region', 'Category', 'Sub-Category', 'Product ID',
       'Sales', 'Quantity', 'Discount', 'Profit'],
      dtype='object')

In [26]:
df.shape

(9994, 19)

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Index           9994 non-null   int64  
 1   Order ID        9994 non-null   object 
 2   Order Date      9994 non-null   object 
 3   Ship Mode       9994 non-null   object 
 4   Customer ID     9994 non-null   object 
 5   Customer Name   9994 non-null   object 
 6   Origin Channel  9994 non-null   object 
 7   Country/Region  9994 non-null   object 
 8   City            9994 non-null   object 
 9   State           9994 non-null   object 
 10  Postal Code     9983 non-null   float64
 11  Region          9994 non-null   object 
 12  Category        9994 non-null   object 
 13  Sub-Category    9994 non-null   object 
 14  Product ID      9994 non-null   object 
 15  Sales           9994 non-null   float64
 16  Quantity        9994 non-null   int64  
 17  Discount        9994 non-null   f

In [28]:
df.drop(["Country/Region"], axis=1, inplace=True)
df.head()

Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,City,State,Postal Code,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit
0,27,CA-2019-121755,16/1/2019,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,90049.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10003027,90.57,3,0.0,11.77
1,45,CA-2019-118255,11/3/2019,First Class,ON-18715,Odella Nelson,Sales,Eagan,Minnesota,55122.0,Central,Special Projects Muesil,Gluten Free,TEC-AC-10000171,45.98,2,0.0,19.77
2,48,CA-2019-169194,20/6/2019,Standard Class,LH-16900,Lena Hernandez,Email,Dover,Delaware,19901.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,45.0,3,0.0,4.95
3,60,CA-2019-111682,17/6/2019,First Class,TB-21055,Ted Butterfield,Email,Troy,New York,12180.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,30.0,2,0.0,3.3
4,63,CA-2018-135545,24/11/2018,Standard Class,KM-16720,Kunst Miller,Email,Los Angeles,California,90004.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10004633,13.98,2,0.0,6.15


Check for duplicates in original data (sheets file): there are no data to signify an individual dataset (row) as order_ids and product_ids are assigned several times, e.g. order of two different products by same customer gets same order_id.

In [29]:
df["Order Date"]= df["Order Date"].str.replace("/", ".")
df.head()

Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,City,State,Postal Code,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit
0,27,CA-2019-121755,16.1.2019,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,90049.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10003027,90.57,3,0.0,11.77
1,45,CA-2019-118255,11.3.2019,First Class,ON-18715,Odella Nelson,Sales,Eagan,Minnesota,55122.0,Central,Special Projects Muesil,Gluten Free,TEC-AC-10000171,45.98,2,0.0,19.77
2,48,CA-2019-169194,20.6.2019,Standard Class,LH-16900,Lena Hernandez,Email,Dover,Delaware,19901.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,45.0,3,0.0,4.95
3,60,CA-2019-111682,17.6.2019,First Class,TB-21055,Ted Butterfield,Email,Troy,New York,12180.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,30.0,2,0.0,3.3
4,63,CA-2018-135545,24.11.2018,Standard Class,KM-16720,Kunst Miller,Email,Los Angeles,California,90004.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10004633,13.98,2,0.0,6.15


In [38]:
df["Order Date"]=pd.to_datetime(df["Order Date"], format='%d.%m.%Y')

df["Order Date"]=pd.to_datetime(df["Order Date"].dt.date)
df.head()

Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,City,State,Postal Code,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit
0,27,CA-2019-121755,2019-01-16,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,90049.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10003027,90.57,3,0.0,11.77
1,45,CA-2019-118255,2019-03-11,First Class,ON-18715,Odella Nelson,Sales,Eagan,Minnesota,55122.0,Central,Special Projects Muesil,Gluten Free,TEC-AC-10000171,45.98,2,0.0,19.77
2,48,CA-2019-169194,2019-06-20,Standard Class,LH-16900,Lena Hernandez,Email,Dover,Delaware,19901.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,45.0,3,0.0,4.95
3,60,CA-2019-111682,2019-06-17,First Class,TB-21055,Ted Butterfield,Email,Troy,New York,12180.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,30.0,2,0.0,3.3
4,63,CA-2018-135545,2018-11-24,Standard Class,KM-16720,Kunst Miller,Email,Los Angeles,California,90004.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10004633,13.98,2,0.0,6.15


In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Index           9994 non-null   int64         
 1   Order ID        9994 non-null   object        
 2   Order Date      9994 non-null   datetime64[ns]
 3   Ship Mode       9994 non-null   object        
 4   Customer ID     9994 non-null   object        
 5   Customer Name   9994 non-null   object        
 6   Origin Channel  9994 non-null   object        
 7   City            9994 non-null   object        
 8   State           9994 non-null   object        
 9   Postal Code     9983 non-null   float64       
 10  Region          9994 non-null   object        
 11  Category        9994 non-null   object        
 12  Sub-Category    9994 non-null   object        
 13  Product ID      9994 non-null   object        
 14  Sales           9994 non-null   float64       
 15  Quan

In [32]:
# export Dataframe into csv File on my Computer
df.to_csv('/Users/christian/neuefische/da-EDA_Project_Ulla-Christian/orders_cleaned.csv')

In [33]:
# make copy of df and name as df_orders for better distinguishabliity
#df_orders=pd.DataFrame(df)

In [44]:
df_opd = pd.read_csv("data/OrderProcessData.csv")
df_opd.head()

Unnamed: 0,Row ID,Order ID,Order Date,On Truck Scan Date,Ship Mode
0,3074,CA-2019-125206,3/1/2019,07/01/2019,Express
1,4919,CA-2019-160304,2/1/2019,09/01/2019,Standard Processing
2,4920,CA-2019-160304,2/1/2019,09/01/2019,Standard Processing
3,8604,US-2019-116365,3/1/2019,09/01/2019,Standard Processing
4,8605,US-2019-116365,3/1/2019,09/01/2019,Standard Processing


In [45]:
df_opd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5899 entries, 0 to 5898
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Row ID              5899 non-null   int64 
 1   Order ID            5899 non-null   object
 2   Order Date          5899 non-null   object
 3   On Truck Scan Date  5899 non-null   object
 4   Ship Mode           5899 non-null   object
dtypes: int64(1), object(4)
memory usage: 230.6+ KB


In [46]:
# check for duplicates
df_opd["Row ID"].duplicated().value_counts()# convert trending_date and publish_time into datetime and extract date part from publish_time
# format specifies the present form of our argument we pass into the function
#df['trending_date'] = pd.to_datetime(df['trending_date'], format='%y.%d.%m')
#df['publish_time'] = pd.to_datetime(df['publish_time'], format='%Y-%m-%dT%H:%M:%S.%fZ')
#df['publish_date'] = pd.to_datetime(df['publish_time'].dt.date)


False    5899
Name: Row ID, dtype: int64

In [48]:
df_opd["Order Date"]= df["Order Date"].str.replace("/", ".")

AttributeError: Can only use .str accessor with string values!

In [49]:
# date time for Order Date
df_opd["Order Date"]=pd.to_datetime(df_opd["Order Date"], format='%d.%m.%Y')
df_opd["Order Date"]=pd.to_datetime(df_opd["Order Date"].dt.date)
df_opd.head()

ValueError: time data '3/1/2019' does not match format '%d.%m.%Y' (match)

In [50]:
# date time for On Truck Scan 
df_opd["On Truck Scan Date"]= df_opd["On Truck Scan Date"].str.replace("/", ".")
df_opd["On Truck Scan Date"]=pd.to_datetime(df_opd["On Truck Scan Date"], format='%d.%m.%Y')
df_opd["On Truck Scan Date"]=pd.to_datetime(df_opd["On Truck Scan Date"].dt.date)
df_opd.head()


Unnamed: 0,Row ID,Order ID,Order Date,On Truck Scan Date,Ship Mode
0,3074,CA-2019-125206,3/1/2019,2019-01-07,Express
1,4919,CA-2019-160304,2/1/2019,2019-01-09,Standard Processing
2,4920,CA-2019-160304,2/1/2019,2019-01-09,Standard Processing
3,8604,US-2019-116365,3/1/2019,2019-01-09,Standard Processing
4,8605,US-2019-116365,3/1/2019,2019-01-09,Standard Processing


In [51]:
# date time for On Truck Scan 
df_opd["Order Date"]= df_opd["Order Date"].str.replace("/", ".")
df_opd["Order Date"]=pd.to_datetime(df_opd["Order Date"], format='%d.%m.%Y')
df_opd["Order Date"]=pd.to_datetime(df_opd["Order Date"].dt.date)
df_opd.head()



Unnamed: 0,Row ID,Order ID,Order Date,On Truck Scan Date,Ship Mode
0,3074,CA-2019-125206,2019-01-03,2019-01-07,Express
1,4919,CA-2019-160304,2019-01-02,2019-01-09,Standard Processing
2,4920,CA-2019-160304,2019-01-02,2019-01-09,Standard Processing
3,8604,US-2019-116365,2019-01-03,2019-01-09,Standard Processing
4,8605,US-2019-116365,2019-01-03,2019-01-09,Standard Processing


In [52]:
# read Intern Data Study
df_ids = pd.read_csv("data/InternDataStudy.csv")
df_ids.head()

Unnamed: 0,Order ID,Ready to Ship Date,Pickup Date
0,CA-2019-116540,02/09/2019,03/09/2019
1,CA-2019-116540,02/09/2019,03/09/2019
2,CA-2019-129847,04/09/2019,04/09/2019
3,CA-2019-129630,04/09/2019,04/09/2019
4,CA-2019-106278,05/09/2019,06/09/2019


In [53]:
df_ids.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290 entries, 0 to 289
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Order ID            290 non-null    object
 1   Ready to Ship Date  290 non-null    object
 2   Pickup Date         290 non-null    object
dtypes: object(3)
memory usage: 6.9+ KB


In [54]:
# check for duplicates
df_ids["Order ID"].duplicated().value_counts()


False    204
True      86
Name: Order ID, dtype: int64

Check if the same order ID is always on the same truck or not. If not, then no duplicate!

In [55]:
# Datetime for Ready to Ship Date
df_ids["Ready to Ship Date"]= df_ids["Ready to Ship Date"].str.replace("/", ".")
df_ids["Ready to Ship Date"]=pd.to_datetime(df_ids["Ready to Ship Date"], format='%d.%m.%Y')
df_ids["Ready to Ship Date"]=pd.to_datetime(df_ids["Ready to Ship Date"].dt.date)
df_ids.head()

Unnamed: 0,Order ID,Ready to Ship Date,Pickup Date
0,CA-2019-116540,2019-09-02,03/09/2019
1,CA-2019-116540,2019-09-02,03/09/2019
2,CA-2019-129847,2019-09-04,04/09/2019
3,CA-2019-129630,2019-09-04,04/09/2019
4,CA-2019-106278,2019-09-05,06/09/2019


In [56]:
# Datetime for Pickup Date
df_ids["Pickup Date"]= df_ids["Pickup Date"].str.replace("/", ".")
df_ids["Pickup Date"]=pd.to_datetime(df_ids["Pickup Date"], format='%d.%m.%Y')
df_ids["Pickup Date"]=pd.to_datetime(df_ids["Pickup Date"].dt.date)
df_ids.head()

Unnamed: 0,Order ID,Ready to Ship Date,Pickup Date
0,CA-2019-116540,2019-09-02,2019-09-03
1,CA-2019-116540,2019-09-02,2019-09-03
2,CA-2019-129847,2019-09-04,2019-09-04
3,CA-2019-129630,2019-09-04,2019-09-04
4,CA-2019-106278,2019-09-05,2019-09-06


In [57]:
# read Campaign Data
df_cd = pd.read_csv("data/Campaign Data.csv")
df_cd.head()

Unnamed: 0,Order ID,Arrival Scan Date,Customer Name
0,CA-2019-109666,03/05/2019,Kunst Miller
1,CA-2019-138933,03/05/2019,Jack Lebron
2,CA-2019-130001,03/05/2019,Heather Kirkland
3,CA-2019-113061,06/05/2019,Ed Ludwig
4,CA-2019-162138,06/05/2019,Grace Kelly


In [58]:
df_cd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Order ID           333 non-null    object
 1   Arrival Scan Date  333 non-null    object
 2   Customer Name      333 non-null    object
dtypes: object(3)
memory usage: 7.9+ KB


In [59]:
# Datetime for Arrival Scan Date
df_cd["Arrival Scan Date"]= df_cd["Arrival Scan Date"].str.replace("/", ".")
df_cd["Arrival Scan Date"]=pd.to_datetime(df_cd["Arrival Scan Date"], format='%d.%m.%Y')
df_cd["Arrival Scan Date"]=pd.to_datetime(df_cd["Arrival Scan Date"].dt.date)
df_cd.head()

Unnamed: 0,Order ID,Arrival Scan Date,Customer Name
0,CA-2019-109666,2019-05-03,Kunst Miller
1,CA-2019-138933,2019-05-03,Jack Lebron
2,CA-2019-130001,2019-05-03,Heather Kirkland
3,CA-2019-113061,2019-05-06,Ed Ludwig
4,CA-2019-162138,2019-05-06,Grace Kelly


In [60]:
# import Dataframe from csv File on my Computer
df_hypo = pd.read_csv("/Users/christian/neuefische/da-EDA_Project_Ulla-Christian/orders_cleaned.csv")

In [61]:
df_hypo.head()

Unnamed: 0.1,Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,City,State,Postal Code,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit
0,0,27,CA-2019-121755,2019-01-16,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,90049.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10003027,90.57,3,0.0,11.77
1,1,45,CA-2019-118255,2019-03-11,First Class,ON-18715,Odella Nelson,Sales,Eagan,Minnesota,55122.0,Central,Special Projects Muesil,Gluten Free,TEC-AC-10000171,45.98,2,0.0,19.77
2,2,48,CA-2019-169194,2019-06-20,Standard Class,LH-16900,Lena Hernandez,Email,Dover,Delaware,19901.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,45.0,3,0.0,4.95
3,3,60,CA-2019-111682,2019-06-17,First Class,TB-21055,Ted Butterfield,Email,Troy,New York,12180.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,30.0,2,0.0,3.3
4,4,63,CA-2018-135545,2018-11-24,Standard Class,KM-16720,Kunst Miller,Email,Los Angeles,California,90004.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10004633,13.98,2,0.0,6.15


In [62]:
inner_join = pd.merge(df_hypo, 
                      df_opd, 
                      on ='Order ID', 
                      how ='inner')
inner_join

Unnamed: 0.1,Unnamed: 0,Index,Order ID,Order Date_x,Ship Mode_x,Customer ID,Customer Name,Origin Channel,City,State,...,Sub-Category,Product ID,Sales,Quantity,Discount,Profit,Row ID,Order Date_y,On Truck Scan Date,Ship Mode_y
0,0,27,CA-2019-121755,2019-01-16,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,...,Gluten Free,TEC-AC-10003027,90.57,3,0.00,11.77,26,2019-01-16,2019-01-23,Standard Processing
1,0,27,CA-2019-121755,2019-01-16,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,...,Gluten Free,TEC-AC-10003027,90.57,3,0.00,11.77,27,2019-01-16,2019-01-23,Standard Processing
2,5558,26,CA-2019-121755,2019-01-16,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,...,Super Fibre Boost,OFF-BI-10001634,11.65,2,0.20,4.22,26,2019-01-16,2019-01-23,Standard Processing
3,5558,26,CA-2019-121755,2019-01-16,Second Class,EH-13945,Eric Hoffmann,Email,Los Angeles,California,...,Super Fibre Boost,OFF-BI-10001634,11.65,2,0.20,4.22,27,2019-01-16,2019-01-23,Standard Processing
4,1,45,CA-2019-118255,2019-03-11,First Class,ON-18715,Odella Nelson,Sales,Eagan,Minnesota,...,Gluten Free,TEC-AC-10000171,45.98,2,0.00,19.77,45,2019-03-11,2019-03-13,Express
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17520,9963,9463,US-2020-109610,2020-11-25,Second Class,BS-11590,Brendan Sweed,Sales,Louisville,Kentucky,...,With Nuts,FUR-CH-10001854,701.96,2,0.00,168.47,9463,2020-11-25,2020-12-02,Standard Processing
17521,9979,9760,CA-2019-146913,2019-10-31,Standard Class,SF-20965,Sylvia Foulston,Sales,San Francisco,California,...,With Nuts,FUR-CH-10001854,1403.92,5,0.20,70.20,9760,2019-10-31,2019-11-06,Standard Processing
17522,9984,9808,CA-2020-107209,2020-07-27,Second Class,JW-15955,Joni Wasserman,Email,Raleigh,North Carolina,...,With Nuts,FUR-CH-10001146,194.85,4,0.20,12.18,9808,2020-07-27,2020-08-05,Standard Processing
17523,9986,9830,US-2020-152842,2020-07-16,Standard Class,NF-18385,Natalie Fritzler,Email,Charlotte,North Carolina,...,With Nuts,FUR-CH-10004218,242.35,3,0.20,15.15,9830,2020-07-16,2020-07-24,Standard Processing


In [63]:
# check if second class and standard class were unified in one class standard processing and first class renamed in Express in df_opd
inner_join[['Index', 'Order ID', 'Order Date_x', 'Order Date_y', 'Ship Mode_x','Ship Mode_y']]

Unnamed: 0,Index,Order ID,Order Date_x,Order Date_y,Ship Mode_x,Ship Mode_y
0,27,CA-2019-121755,2019-01-16,2019-01-16,Second Class,Standard Processing
1,27,CA-2019-121755,2019-01-16,2019-01-16,Second Class,Standard Processing
2,26,CA-2019-121755,2019-01-16,2019-01-16,Second Class,Standard Processing
3,26,CA-2019-121755,2019-01-16,2019-01-16,Second Class,Standard Processing
4,45,CA-2019-118255,2019-03-11,2019-03-11,First Class,Express
...,...,...,...,...,...,...
17520,9463,US-2020-109610,2020-11-25,2020-11-25,Second Class,Standard Processing
17521,9760,CA-2019-146913,2019-10-31,2019-10-31,Standard Class,Standard Processing
17522,9808,CA-2020-107209,2020-07-27,2020-07-27,Second Class,Standard Processing
17523,9830,US-2020-152842,2020-07-16,2020-07-16,Standard Class,Standard Processing


In [64]:
# rename ship modes in df_hypo

df_hypo.loc[df_hypo['Ship Mode'] == 'Standard Class', 'Ship Mode'] = "Standard Processing"
df_hypo.loc[df_hypo['Ship Mode'] == 'Second Class', 'Ship Mode'] = "Standard Processing"
df_hypo.loc[df_hypo['Ship Mode'] == 'First Class', 'Ship Mode'] = "Express"
df_hypo['Unnamed: 0'].drop
df_hypo.head()



Unnamed: 0.1,Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,City,State,Postal Code,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit
0,0,27,CA-2019-121755,2019-01-16,Standard Processing,EH-13945,Eric Hoffmann,Email,Los Angeles,California,90049.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10003027,90.57,3,0.0,11.77
1,1,45,CA-2019-118255,2019-03-11,Express,ON-18715,Odella Nelson,Sales,Eagan,Minnesota,55122.0,Central,Special Projects Muesil,Gluten Free,TEC-AC-10000171,45.98,2,0.0,19.77
2,2,48,CA-2019-169194,2019-06-20,Standard Processing,LH-16900,Lena Hernandez,Email,Dover,Delaware,19901.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,45.0,3,0.0,4.95
3,3,60,CA-2019-111682,2019-06-17,Express,TB-21055,Ted Butterfield,Email,Troy,New York,12180.0,East,Special Projects Muesil,Gluten Free,TEC-AC-10002167,30.0,2,0.0,3.3
4,4,63,CA-2018-135545,2018-11-24,Standard Processing,KM-16720,Kunst Miller,Email,Los Angeles,California,90004.0,West,Special Projects Muesil,Gluten Free,TEC-AC-10004633,13.98,2,0.0,6.15


In [65]:
# check if process time is 2 days for standard processing formula = ready to ship - order date
check_process_time = pd.merge(df_hypo, 
                      df_ids, 
                      on ='Order ID', 
                      how ='inner')
check_process_time

Unnamed: 0.1,Unnamed: 0,Index,Order ID,Order Date,Ship Mode,Customer ID,Customer Name,Origin Channel,City,State,...,Region,Category,Sub-Category,Product ID,Sales,Quantity,Discount,Profit,Ready to Ship Date,Pickup Date
0,8,107,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Gluten Free,TEC-AC-10003499,74.11,8,0.20,17.60,2020-11-30,2020-12-02
1,8,107,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Gluten Free,TEC-AC-10003499,74.11,8,0.20,17.60,2020-11-30,2020-12-02
2,3507,108,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Only Oats,TEC-PH-10002844,27.99,1,0.20,2.10,2020-11-30,2020-12-02
3,3507,108,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Only Oats,TEC-PH-10002844,27.99,1,0.20,2.10,2020-11-30,2020-12-02
4,7086,109,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Power Muesli,Super Mega Protein,OFF-AR-10000390,3.30,1,0.20,1.07,2020-11-30,2020-12-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
748,9431,961,CA-2020-152142,2020-11-14,Standard Processing,LW-16990,Lindsay Williams,Sales,San Francisco,California,...,West,Toasted Muesli,With Nuts,FUR-CH-10002965,321.57,2,0.20,28.14,2020-11-16,2020-11-18
749,9453,1314,US-2019-134488,2019-09-24,Standard Processing,PK-19075,Pete Kriz,Email,Columbus,Ohio,...,East,Toasted Muesli,With Nuts,FUR-CH-10003199,155.37,2,0.30,-13.32,2019-10-01,2019-10-02
750,9661,4753,CA-2020-103499,2020-11-20,Standard Processing,ES-14020,Erica Smith,Email,Jackson,Tennessee,...,South,Toasted Muesli,With Nuts,FUR-CH-10001482,209.57,2,0.20,-23.58,2020-11-24,2020-11-25
751,9948,9289,US-2020-165456,2020-11-30,Express,TB-21625,Trudy Brown,Email,Philadelphia,Pennsylvania,...,East,Toasted Muesli,With Nuts,FUR-CH-10003981,1079.32,6,0.30,-15.42,2020-12-03,2020-12-04


In [66]:
check_process_time[['Order ID', 'Order Date','Ready to Ship Date', 'Ship Mode']]

Unnamed: 0,Order ID,Order Date,Ready to Ship Date,Ship Mode
0,CA-2020-119004,2020-11-23,2020-11-30,Standard Processing
1,CA-2020-119004,2020-11-23,2020-11-30,Standard Processing
2,CA-2020-119004,2020-11-23,2020-11-30,Standard Processing
3,CA-2020-119004,2020-11-23,2020-11-30,Standard Processing
4,CA-2020-119004,2020-11-23,2020-11-30,Standard Processing
...,...,...,...,...
748,CA-2020-152142,2020-11-14,2020-11-16,Standard Processing
749,US-2019-134488,2019-09-24,2019-10-01,Standard Processing
750,CA-2020-103499,2020-11-20,2020-11-24,Standard Processing
751,US-2020-165456,2020-11-30,2020-12-03,Express


In [67]:
check_process_time.columns = check_process_time.columns.str.lower()
check_process_time.columns = check_process_time.columns.str.replace(' ','_')
check_process_time

Unnamed: 0,unnamed:_0,index,order_id,order_date,ship_mode,customer_id,customer_name,origin_channel,city,state,...,region,category,sub-category,product_id,sales,quantity,discount,profit,ready_to_ship_date,pickup_date
0,8,107,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Gluten Free,TEC-AC-10003499,74.11,8,0.20,17.60,2020-11-30,2020-12-02
1,8,107,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Gluten Free,TEC-AC-10003499,74.11,8,0.20,17.60,2020-11-30,2020-12-02
2,3507,108,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Only Oats,TEC-PH-10002844,27.99,1,0.20,2.10,2020-11-30,2020-12-02
3,3507,108,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Special Projects Muesil,Only Oats,TEC-PH-10002844,27.99,1,0.20,2.10,2020-11-30,2020-12-02
4,7086,109,CA-2020-119004,2020-11-23,Standard Processing,JM-15250,Janet Martin,Email,Charlotte,North Carolina,...,South,Power Muesli,Super Mega Protein,OFF-AR-10000390,3.30,1,0.20,1.07,2020-11-30,2020-12-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
748,9431,961,CA-2020-152142,2020-11-14,Standard Processing,LW-16990,Lindsay Williams,Sales,San Francisco,California,...,West,Toasted Muesli,With Nuts,FUR-CH-10002965,321.57,2,0.20,28.14,2020-11-16,2020-11-18
749,9453,1314,US-2019-134488,2019-09-24,Standard Processing,PK-19075,Pete Kriz,Email,Columbus,Ohio,...,East,Toasted Muesli,With Nuts,FUR-CH-10003199,155.37,2,0.30,-13.32,2019-10-01,2019-10-02
750,9661,4753,CA-2020-103499,2020-11-20,Standard Processing,ES-14020,Erica Smith,Email,Jackson,Tennessee,...,South,Toasted Muesli,With Nuts,FUR-CH-10001482,209.57,2,0.20,-23.58,2020-11-24,2020-11-25
751,9948,9289,US-2020-165456,2020-11-30,Express,TB-21625,Trudy Brown,Email,Philadelphia,Pennsylvania,...,East,Toasted Muesli,With Nuts,FUR-CH-10003981,1079.32,6,0.30,-15.42,2020-12-03,2020-12-04


In [86]:
check_process_time_standard = check_process_time.iloc[: , [1, 2,3,4,19,20]].copy()
check_process_time_standard["order_date"]=pd.to_datetime(check_process_time_standard["order_date"], format='%Y.%m.%d')
check_process_time_standard["order_date"]=pd.to_datetime(check_process_time_standard["order_date"].dt.date)
check_process_time_standard.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 753 entries, 0 to 752
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   index               753 non-null    int64         
 1   order_id            753 non-null    object        
 2   order_date          753 non-null    datetime64[ns]
 3   ship_mode           753 non-null    object        
 4   ready_to_ship_date  753 non-null    datetime64[ns]
 5   pickup_date         753 non-null    datetime64[ns]
dtypes: datetime64[ns](3), int64(1), object(2)
memory usage: 41.2+ KB


In [73]:
#check_process_time_standard.query('ready_to_ship_date - order_date').dt.days
#check_process_time_standard['process_time']=(check_process_time["ready_to_ship_date"]-check_process_time["order_date"]).dt.days
#check_process_time_standard['trending_days_difference']=(check_process_time_standard["trending_date"]-check_process_time_standard["publish_date"]).dt.days
#check_process_time_standard['process_time']=(check_process_time_standard["ready_to_ship_date"]-check_process_time_standard["order_date"]).dt.days
check_process_time_standard

Unnamed: 0,index,order_id,order_date,ship_mode,ready_to_ship_date,process_time
0,107,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,7
1,107,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,7
2,108,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,7
3,108,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,7
4,109,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,7
...,...,...,...,...,...,...
748,961,CA-2020-152142,2020-11-14,Standard Processing,2020-11-16,2
749,1314,US-2019-134488,2019-09-24,Standard Processing,2019-10-01,7
750,4753,CA-2020-103499,2020-11-20,Standard Processing,2020-11-24,4
751,9289,US-2020-165456,2020-11-30,Express,2020-12-03,3


In [81]:
#check_process_time_standard.describe()
#check_process_time_standard.query('ship_mode' == 'Standard Processing')
check_process_time_standard.loc[check_process_time_standard['ship_mode'] == 'Standard Processing'].describe()





Unnamed: 0,index,process_time
count,602.0,602.0
mean,5525.51,4.88
std,2776.21,1.26
min,97.0,2.0
25%,3139.0,4.0
50%,5506.0,5.0
75%,8170.0,6.0
max,9973.0,8.0


mean process time for standard processing is almost 5 days and the median is exactly 5 days. Müsli Comapanies assumption of 2 days processing time has to be rejected in this sample

In [82]:
check_process_time_standard.loc[check_process_time_standard['ship_mode'] == 'Express'].describe()


Unnamed: 0,index,process_time
count,151.0,151.0
mean,4598.68,2.08
std,3082.02,1.82
min,511.0,0.0
25%,1654.0,0.0
50%,4012.0,2.0
75%,7293.5,3.0
max,9930.0,5.0


mean process time for standard processing is almost 2 days and the median is exactly 2 days. Müsli Comapanies assumption of 2 days processing time for express processing is correct in this sample

In [87]:
check_process_time_standard['pick_up']=(check_process_time_standard["pickup_date"]-check_process_time_standard["order_date"]).dt.days
check_process_time_standard.describe()

Unnamed: 0,index,order_id,order_date,ship_mode,ready_to_ship_date,pickup_date,pick_up
0,107,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,2020-12-02,9
1,107,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,2020-12-02,9
2,108,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,2020-12-02,9
3,108,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,2020-12-02,9
4,109,CA-2020-119004,2020-11-23,Standard Processing,2020-11-30,2020-12-02,9
...,...,...,...,...,...,...,...
748,961,CA-2020-152142,2020-11-14,Standard Processing,2020-11-16,2020-11-18,4
749,1314,US-2019-134488,2019-09-24,Standard Processing,2019-10-01,2019-10-02,8
750,4753,CA-2020-103499,2020-11-20,Standard Processing,2020-11-24,2020-11-25,5
751,9289,US-2020-165456,2020-11-30,Express,2020-12-03,2020-12-04,4


In [88]:
check_process_time_standard.describe()

Unnamed: 0,index,pick_up
count,753.0,753.0
mean,5339.65,6.07
std,2862.22,2.36
min,97.0,0.0
25%,2973.0,5.0
50%,5333.0,7.0
75%,8046.0,8.0
max,9973.0,10.0


In [90]:
check_process_time_standard.loc[check_process_time_standard['ship_mode'] == 'Standard Processing'].describe()

Unnamed: 0,index,pick_up
count,602.0,602.0
mean,5525.51,6.98
std,2776.21,1.45
min,97.0,3.0
25%,3139.0,6.0
50%,5506.0,7.0
75%,8170.0,8.0
max,9973.0,10.0


In [91]:
check_process_time_standard.loc[check_process_time_standard['ship_mode'] == 'Express'].describe()

Unnamed: 0,index,pick_up
count,151.0,151.0
mean,4598.68,2.46
std,3082.02,1.75
min,511.0,0.0
25%,1654.0,1.0
50%,4012.0,2.0
75%,7293.5,4.0
max,9930.0,6.0


In [96]:
df_cd.columns = df_cd.columns.str.lower()
df_cd.columns = df_cd.columns.str.replace(' ','_')
df_cd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   order_id           333 non-null    object        
 1   arrival_scan_date  333 non-null    datetime64[ns]
 2   customer_name      333 non-null    object        
dtypes: datetime64[ns](1), object(2)
memory usage: 7.9+ KB


In [97]:
check_process_time = pd.merge(check_process_time, 
                      df_cd, 
                      on ='order_id', 
                      how ='inner')
check_process_time


Unnamed: 0,unnamed:_0,index,order_id,order_date,ship_mode,customer_id,customer_name_x,origin_channel,city,state,...,sub-category,product_id,sales,quantity,discount,profit,ready_to_ship_date,pickup_date,arrival_scan_date,customer_name_y
0,17,252,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,Gluten Free,TEC-AC-10003832,3347.37,13,0.00,636.00,2019-09-17,2019-09-18,2019-09-23,Kelly Collister
1,17,252,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,Gluten Free,TEC-AC-10003832,3347.37,13,0.00,636.00,2019-09-17,2019-09-18,2019-09-23,Kelly Collister
2,2157,251,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,Nuts and more,OFF-PA-10004569,7.61,1,0.00,3.58,2019-09-17,2019-09-18,2019-09-23,Kelly Collister
3,2157,251,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,Nuts and more,OFF-PA-10004569,7.61,1,0.00,3.58,2019-09-17,2019-09-18,2019-09-23,Kelly Collister
4,85,1042,CA-2019-102981,2019-09-06,Standard Processing,MO-17500,Mary O'Rourke,Email,New York City,New York,...,Gluten Free,TEC-AC-10004761,31.86,2,0.00,11.15,2019-09-09,2019-09-11,2019-09-16,Mary O'Rourke
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,5697,992,CA-2019-110023,2019-09-09,Express,TS-21610,Troy Staebel,Email,New York City,New York,...,Super Fibre Boost,OFF-BI-10001036,14.62,2,0.20,5.48,2019-09-11,2019-09-11,2019-09-16,Troy Staebel
80,6389,5399,CA-2019-163202,2019-09-27,Standard Processing,BM-11650,Brian Moss,Sales,Lancaster,Ohio,...,Super Fibre Boost,OFF-BI-10002764,2.91,3,0.70,-2.03,2019-10-02,2019-10-04,2019-10-07,Brian Moss
81,6835,8468,CA-2019-168557,2019-09-19,Standard Processing,FH-14275,Frank Hawley,Sales,San Francisco,California,...,Super Fibre Boost,OFF-BI-10002309,8.93,2,0.20,3.12,2019-09-23,2019-09-25,2019-09-30,Frank Hawley
82,8403,9281,CA-2019-166772,2019-09-20,Standard Processing,HJ-14875,Heather Jas,Facebook,Seattle,Washington,...,With Fruit,FUR-BO-10002853,163.88,2,0.00,40.97,2019-09-24,2019-09-25,2019-09-30,Heather Jas


In [107]:
check_process_time_standard = check_process_time.iloc[: , [1, 2,3,4,19,20,21]].copy()
check_process_time_standard["order_date"]=pd.to_datetime(check_process_time_standard["order_date"], format='%Y.%m.%d')
check_process_time_standard["order_date"]=pd.to_datetime(check_process_time_standard["order_date"].dt.date)
check_process_time_standard.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 84 entries, 0 to 83
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   index               84 non-null     int64         
 1   order_id            84 non-null     object        
 2   order_date          84 non-null     datetime64[ns]
 3   ship_mode           84 non-null     object        
 4   ready_to_ship_date  84 non-null     datetime64[ns]
 5   pickup_date         84 non-null     datetime64[ns]
 6   arrival_scan_date   84 non-null     datetime64[ns]
dtypes: datetime64[ns](4), int64(1), object(2)
memory usage: 5.2+ KB


In [108]:
check_process_time_standard['arrival']=(check_process_time_standard["arrival_scan_date"]-check_process_time_standard["order_date"]).dt.days

check_process_time_standard

Unnamed: 0,index,order_id,order_date,ship_mode,ready_to_ship_date,pickup_date,arrival_scan_date,arrival
0,252,CA-2019-145625,2019-09-11,Standard Processing,2019-09-17,2019-09-18,2019-09-23,12
1,252,CA-2019-145625,2019-09-11,Standard Processing,2019-09-17,2019-09-18,2019-09-23,12
2,251,CA-2019-145625,2019-09-11,Standard Processing,2019-09-17,2019-09-18,2019-09-23,12
3,251,CA-2019-145625,2019-09-11,Standard Processing,2019-09-17,2019-09-18,2019-09-23,12
4,1042,CA-2019-102981,2019-09-06,Standard Processing,2019-09-09,2019-09-11,2019-09-16,10
...,...,...,...,...,...,...,...,...
79,992,CA-2019-110023,2019-09-09,Express,2019-09-11,2019-09-11,2019-09-16,7
80,5399,CA-2019-163202,2019-09-27,Standard Processing,2019-10-02,2019-10-04,2019-10-07,10
81,8468,CA-2019-168557,2019-09-19,Standard Processing,2019-09-23,2019-09-25,2019-09-30,11
82,9281,CA-2019-166772,2019-09-20,Standard Processing,2019-09-24,2019-09-25,2019-09-30,10


In [111]:
check_process_time_standard.loc[check_process_time_standard['ship_mode'] == 'Standard Processing'].describe()

Unnamed: 0,index,arrival
count,76.0,76.0
mean,3889.18,10.76
std,2393.04,1.47
min,99.0,8.0
25%,2638.25,10.0
50%,3413.0,10.0
75%,5333.0,12.0
max,9973.0,13.0


mean time between order and arrival for standard processing is almost 11 days and the median is exactly 10 days. Müsli Comapanies assumption of about 7 days between order and arrival is not correct for the sample (76)

In [110]:
check_process_time_standard.loc[check_process_time_standard['ship_mode'] == 'Express'].describe()

Unnamed: 0,index,arrival
count,8.0,8.0
mean,3908.75,7.12
std,4261.13,1.36
min,870.0,5.0
25%,870.75,6.5
50%,931.5,8.0
75%,7685.0,8.0
max,9930.0,8.0


mean time between order and arrival for express is about 7 days and the median is exactly 8 days. Müsli Comapanies assumption of about 7 days between order and arrival is almost correct for the sample (8)

In [117]:
df_opd.columns = df_opd.columns.str.lower()
df_opd.columns = df_opd.columns.str.replace(' ','_')
df_opd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5899 entries, 0 to 5898
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   row_id              5899 non-null   int64         
 1   order_id            5899 non-null   object        
 2   order_date          5899 non-null   datetime64[ns]
 3   on_truck_scan_date  5899 non-null   datetime64[ns]
 4   ship_mode           5899 non-null   object        
dtypes: datetime64[ns](2), int64(1), object(2)
memory usage: 230.6+ KB


In [120]:
check_process_time = pd.merge(check_process_time, 
                      df_opd, 
                      on ='order_id', 
                      how ='inner')
check_process_time

Unnamed: 0,unnamed:_0,index,order_id,order_date_x,ship_mode_x,customer_id,customer_name_x,origin_channel,city,state,...,discount,profit,ready_to_ship_date,pickup_date,arrival_scan_date,customer_name_y,row_id,order_date_y,on_truck_scan_date,ship_mode_y
0,17,252,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,0.00,636.00,2019-09-17,2019-09-18,2019-09-23,Kelly Collister,251,2019-09-11,2019-09-18,Standard Processing
1,17,252,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,0.00,636.00,2019-09-17,2019-09-18,2019-09-23,Kelly Collister,252,2019-09-11,2019-09-18,Standard Processing
2,17,252,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,0.00,636.00,2019-09-17,2019-09-18,2019-09-23,Kelly Collister,251,2019-09-11,2019-09-18,Standard Processing
3,17,252,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,0.00,636.00,2019-09-17,2019-09-18,2019-09-23,Kelly Collister,252,2019-09-11,2019-09-18,Standard Processing
4,2157,251,CA-2019-145625,2019-09-11,Standard Processing,KC-16540,Kelly Collister,Email,San Diego,California,...,0.00,3.58,2019-09-17,2019-09-18,2019-09-23,Kelly Collister,251,2019-09-11,2019-09-18,Standard Processing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,5697,992,CA-2019-110023,2019-09-09,Express,TS-21610,Troy Staebel,Email,New York City,New York,...,0.20,5.48,2019-09-11,2019-09-11,2019-09-16,Troy Staebel,992,2019-09-09,2019-09-11,Express
246,6389,5399,CA-2019-163202,2019-09-27,Standard Processing,BM-11650,Brian Moss,Sales,Lancaster,Ohio,...,0.70,-2.03,2019-10-02,2019-10-04,2019-10-07,Brian Moss,5399,2019-09-27,2019-10-04,Standard Processing
247,6835,8468,CA-2019-168557,2019-09-19,Standard Processing,FH-14275,Frank Hawley,Sales,San Francisco,California,...,0.20,3.12,2019-09-23,2019-09-25,2019-09-30,Frank Hawley,8468,2019-09-19,2019-09-25,Standard Processing
248,8403,9281,CA-2019-166772,2019-09-20,Standard Processing,HJ-14875,Heather Jas,Facebook,Seattle,Washington,...,0.00,40.97,2019-09-24,2019-09-25,2019-09-30,Heather Jas,9281,2019-09-20,2019-09-25,Standard Processing


In [None]:
# letzer stand scan on truck zu arrival

In [124]:
check_process_time= check_process_time.iloc[: , [1, 2,3,4,19,20,21,22]].copy()
check_process_time["order_date"]=pd.to_datetime(check_process_time["order_date"], format='%Y.%m.%d')
check_process_time["order_date"]=pd.to_datetime(check_process_time["order_date"].dt.date)
check_process_time

KeyError: 'order_date'