## <a id='0'>Contents </a>

- <a href='#0'>Contents</a>
- <a href='#1'>1.Import Packages</a>
- <a href='#2'>2.Set Root Directory </a>
- <a href='#3'>3.Read Data file(s)</a>
- <a href='#4'>4.Data Overview</a>
- <a href='#5'>5.Data Type Conversion</a>
    - <a href='#5.1'>5.1 Deriving New Features </a>
    - <a href='#5.2'>5.2 Creating Order Level Data Frame </a>
    - <a href='#5.3'>5.3 Summarizing Numerical Values to Order Level </a>
- <a href='#6'>6. Writing Files to disk </a>



## <a id='1'>1. Import Packages</a> 
<a href='#0'>Go to top </a>

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import pandas_profiling
import matplotlib.pyplot as plt
from datetime import datetime, date

%matplotlib inline
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')

## <a id='2'>2. Set Root Directory</a>
<a href='#0'>Go to top </a>

In [2]:
# This variable should be the only one that requires adjustment on each developer's machine

PROJECT_ROOT_FOLDER = 'C:/Users/Kishan/Desktop/SupplyChain-SAP/'

# ----------------------------------------------------------------
# These variables shouldn't have to be updated on each dev machine
RAW_DATA_FOLDER       = PROJECT_ROOT_FOLDER + 'data/rawdata/'
PROCESSED_DATA_FOLDER = PROJECT_ROOT_FOLDER + 'data/processeddata/'
OUTPUT_FOLDER         = PROJECT_ROOT_FOLDER + 'output/'

#SPRINT_DATA_FOLDER    = PROCESSED_DATA_FOLDER + 'sprint1/'
#SPRINT_OUTPUT_FOLDER  = OUTPUT_FOLDER + 'sprint1/'

## <a id='3'>3.Read Data file(s)</a>
<a href='#0'>Go to top </a>

In [3]:
#Reading CustomerInfo file to perform EDA
df = pd.read_csv(RAW_DATA_FOLDER + 'DataCoSupplyChainDataset.csv'
                 #,sep='|'
                 ,encoding = "latin-1"
                # ,error_bad_lines=False
                 ,parse_dates=True
                )

In [4]:
#Adding Target Variable. Reducing from multi classification problem to binary classfication

#df['late_delivery'] = np.where(df['delivery_status']=='Late delivery',1,0)

## <a id='4'>4. Data Overview </a>
<a href='#0'>Go to top </a>

In [5]:
#Getting the row count and column count

df.shape

(180519, 56)

In [6]:
#Sampling the data

df.head(2)

Unnamed: 0,type,shipping_days_actual,shipping_days_scheduled,benefit_per_order,sales_per_customer,delivery_status,late_delivery_risk,category_id,category_name,customer_city,customer_country,customer_email,customer_firstname,customer_id,customer_lastname,customer_password,customer_segment,customer_state,customer_street,customer_zip,dept_id,dept_name,latitude,longitude,market,order_city,order_country,order_cust_id,order_date,order_id,order_item_cardprod_id,order_item_discount,order_item_discount_rate,order_item_id,order_item_product_price,order_item_profit_ratio,order_item_qty,sales,order_item_total,order_profit_per_order,order_region,order_state,order_status,order_zip,product_card_id,product_category_id,product_desc,product_image,product_name,product_price,product_status,shipping_date,shipping_mode,num_days_late,store_city,store_state
0,DEBIT,3,4,91.25,314.64,Advance shipping,0,73,Sporting Goods,Caguas,Puerto Rico,XXXXXXXXX,Cally,20755,Holloway,XXXXXXXXX,Consumer,PR,5365 Noble Nectar Island,725.0,2,Fitness,18.25,-66.04,Pacific Asia,Bekasi,Indonesia,20755,1/31/2018 22:56,77202,1360,13.11,0.04,180517,327.75,0.29,1,327.75,314.64,91.25,Southeast Asia,Java Occidental,COMPLETE,,1360,73,,http://images.acmesports.sports/Smart+watch,Smart watch,327.75,0,2/3/2018 22:56,Standard Class,-1,Caguas,Puerto Rico
1,TRANSFER,5,4,-249.09,311.36,Late delivery,1,73,Sporting Goods,Caguas,Puerto Rico,XXXXXXXXX,Irene,19492,Luna,XXXXXXXXX,Consumer,PR,2679 Rustic Loop,725.0,2,Fitness,18.28,-66.04,Pacific Asia,Bikaner,India,19492,1/13/2018 12:27,75939,1360,16.39,0.05,179254,327.75,-0.8,1,327.75,311.36,-249.09,South Asia,RajastÃÂÃÂ¡n,PENDING,,1360,73,,http://images.acmesports.sports/Smart+watch,Smart watch,327.75,0,1/18/2018 12:27,Standard Class,1,Caguas,Puerto Rico


In [7]:
#Validating column data types

df.dtypes

type                         object
shipping_days_actual          int64
shipping_days_scheduled       int64
benefit_per_order           float64
sales_per_customer          float64
delivery_status              object
late_delivery_risk            int64
category_id                   int64
category_name                object
customer_city                object
customer_country             object
customer_email               object
customer_firstname           object
customer_id                   int64
customer_lastname            object
customer_password            object
customer_segment             object
customer_state               object
customer_street              object
customer_zip                float64
dept_id                       int64
dept_name                    object
latitude                    float64
longitude                   float64
market                       object
order_city                   object
order_country                object
order_cust_id               

In [8]:
df.describe()

Unnamed: 0,shipping_days_actual,shipping_days_scheduled,benefit_per_order,sales_per_customer,late_delivery_risk,category_id,customer_id,customer_zip,dept_id,latitude,longitude,order_cust_id,order_id,order_item_cardprod_id,order_item_discount,order_item_discount_rate,order_item_id,order_item_product_price,order_item_profit_ratio,order_item_qty,sales,order_item_total,order_profit_per_order,order_zip,product_card_id,product_category_id,product_desc,product_price,product_status,num_days_late
count,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180516.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,180519.0,24840.0,180519.0,180519.0,0.0,180519.0,180519.0,180519.0
mean,3.5,2.93,21.97,183.11,0.55,31.85,6691.38,35921.13,5.44,29.72,-84.92,6691.38,36221.89,692.51,20.66,0.1,90260.0,141.23,0.12,2.13,203.77,183.11,21.97,55426.13,692.51,31.85,,141.23,0.0,0.57
std,1.62,1.37,104.43,120.04,0.5,15.64,4162.92,37542.46,1.63,9.81,21.43,4162.92,21045.38,336.45,21.8,0.07,52111.49,139.73,0.47,1.45,132.27,120.04,104.43,31919.28,336.45,15.64,,139.73,0.0,1.49
min,0.0,0.0,-4274.98,7.49,0.0,2.0,1.0,603.0,2.0,-33.94,-158.03,1.0,1.0,19.0,0.0,0.0,1.0,9.99,-2.75,1.0,9.99,7.49,-4274.98,1040.0,19.0,2.0,,9.99,0.0,-2.0
25%,2.0,2.0,7.0,104.38,0.0,18.0,3258.5,725.0,4.0,18.27,-98.45,3258.5,18057.0,403.0,5.4,0.04,45130.5,50.0,0.08,1.0,119.98,104.38,7.0,23464.0,403.0,18.0,,50.0,0.0,0.0
50%,3.0,4.0,31.52,163.99,1.0,29.0,6457.0,19380.0,5.0,33.14,-76.85,6457.0,36140.0,627.0,14.0,0.1,90260.0,59.99,0.27,1.0,199.92,163.99,31.52,59405.0,627.0,29.0,,59.99,0.0,1.0
75%,5.0,4.0,64.8,247.4,1.0,45.0,9779.0,78207.0,7.0,39.28,-66.37,9779.0,54144.0,1004.0,29.99,0.16,135389.5,199.99,0.36,3.0,299.95,247.4,64.8,90008.0,1004.0,45.0,,199.99,0.0,1.0
max,6.0,4.0,911.8,1939.99,1.0,76.0,20757.0,99205.0,12.0,48.78,115.26,20757.0,77204.0,1363.0,500.0,0.25,180519.0,1999.99,0.5,5.0,1999.99,1939.99,911.8,99301.0,1363.0,76.0,,1999.99,0.0,4.0


## <a id='5'>5.Data Type Conversion</a>
<a href='#0'>Go to top </a>

In [9]:
# Converting String columns with categorical values to Categorical data type

#df['type'] = df['type'].astype('object')

# LoyaltyCustomerFlag is converted to category data type later in the process
#df['LoyaltyCustomerFlag'] = df['LoyaltyCustomerFlag'].astype('category')   

# Converting Float columns with ID values to Object data type
#df['customer_id'] = df['customer_id'].astype('object')
#df['dept_id'] = df['dept_id'].astype('object')
#df['order_cust_id'] = df['order_cust_id'].astype('object')
#df['order_id'] = df['order_id'].astype('object')
#df['order_item_cardprod_id'] = df['order_item_cardprod_id'].astype('object')
#df['order_item_id'] = df['order_item_id'].astype('object')

In [10]:
# Converting String columns with date values to Date data type
#using coerce to overcome above errors 
#df['DOB'] = pd.to_datetime(df['DOB'],errors = 'coerce') 

df['order_date'] = pd.to_datetime(df['order_date'])
df['shipping_date'] = pd.to_datetime(df['shipping_date'])

### <a id='5.1'>5.1 Deriving New Features</a>
<a href='#0'>Go to top </a>

In [11]:
df['store_country']= np.where(df['store_state']=='Puerto Rico','Puerto Rico','United States')

In [12]:
df['order_dt_year'] = df['order_date'].dt.year
df['order_dt_month'] = df['order_date'].dt.month
df['shipping_dt_year'] = df['shipping_date'].dt.year
df['shipping_dt_month'] = df['shipping_date'].dt.month
df['order_dt_year_month'] = pd.to_datetime(df['order_date']).dt.strftime('%Y-%m')
df['shipping_dt_year_month'] = pd.to_datetime(df['shipping_date']).dt.strftime('%Y-%m')

In [13]:
df['order_dt_weekday']=df['order_date'].dt.weekday
df['shipping_dt_weekday']=df['shipping_date'].dt.weekday

df['order_dt_weekday_name']=df['order_date'].dt.weekday_name
df['shipping_dt_weekday_name']=df['shipping_date'].dt.weekday_name

df['ordered_on_weekends']= np.where(df['order_dt_weekday'].isin([5,6]),"OrderedonWeekend", "OrderedonWeekday")

df['shipping_on_weekends']= np.where(df['shipping_dt_weekday'].isin([5,6]),"ShippedonWeekend", "ShippededonWeekday")

df['weekend_duringshipping']=np.where(((df['shipping_on_weekends']=='ShippedonWeekend')|((df['shipping_dt_weekday']+df['shipping_days_actual']-1)>=5)),'Yes Weekend involved','No weekend involved')

df['weekend_duringshipping_Scheduleddays']=np.where(((df['shipping_on_weekends']=='ShippedonWeekend')|((df['shipping_dt_weekday']+df['shipping_days_scheduled']-1)>=5)),'Yes Weekend involved','No weekend involved')


df['weekend_between_order_ship']=np.where((df['order_dt_weekday']>df['shipping_dt_weekday']),'Weekend between Order and Ship date','No Weekend between Order and Ship date')

In [14]:
df['sc_oc'] = df['store_city']+"-"+df['order_city']

df['samecountry_source_dest'] = np.where(df['store_country']==df['order_country'],"Same Country","Different Countries")

In [15]:
df['gapbetween_orderandship_date'] = (df['shipping_date']-df['order_date'])
df['gapbetween_orderandship_date']=df['gapbetween_orderandship_date']/np.timedelta64(1,'D')

In [16]:
df.shape

(180519, 75)

### <a id='5.2'>5.2 Creating Order Level Data Frame</a>
<a href='#0'>Go to top </a>

In [17]:
df.dtypes.index.to_list()

['type',
 'shipping_days_actual',
 'shipping_days_scheduled',
 'benefit_per_order',
 'sales_per_customer',
 'delivery_status',
 'late_delivery_risk',
 'category_id',
 'category_name',
 'customer_city',
 'customer_country',
 'customer_email',
 'customer_firstname',
 'customer_id',
 'customer_lastname',
 'customer_password',
 'customer_segment',
 'customer_state',
 'customer_street',
 'customer_zip',
 'dept_id',
 'dept_name',
 'latitude',
 'longitude',
 'market',
 'order_city',
 'order_country',
 'order_cust_id',
 'order_date',
 'order_id',
 'order_item_cardprod_id',
 'order_item_discount',
 'order_item_discount_rate',
 'order_item_id',
 'order_item_product_price',
 'order_item_profit_ratio',
 'order_item_qty',
 'sales',
 'order_item_total',
 'order_profit_per_order',
 'order_region',
 'order_state',
 'order_status',
 'order_zip',
 'product_card_id',
 'product_category_id',
 'product_desc',
 'product_image',
 'product_name',
 'product_price',
 'product_status',
 'shipping_date',
 'shippi

In [18]:
df[df['order_id']==10].head()

Unnamed: 0,type,shipping_days_actual,shipping_days_scheduled,benefit_per_order,sales_per_customer,delivery_status,late_delivery_risk,category_id,category_name,customer_city,customer_country,customer_email,customer_firstname,customer_id,customer_lastname,customer_password,customer_segment,customer_state,customer_street,customer_zip,dept_id,dept_name,latitude,longitude,market,order_city,order_country,order_cust_id,order_date,order_id,order_item_cardprod_id,order_item_discount,order_item_discount_rate,order_item_id,order_item_product_price,order_item_profit_ratio,order_item_qty,sales,order_item_total,order_profit_per_order,order_region,order_state,order_status,order_zip,product_card_id,product_category_id,product_desc,product_image,product_name,product_price,product_status,shipping_date,shipping_mode,num_days_late,store_city,store_state,store_country,order_dt_year,order_dt_month,shipping_dt_year,shipping_dt_month,order_dt_year_month,shipping_dt_year_month,order_dt_weekday,shipping_dt_weekday,order_dt_weekday_name,shipping_dt_weekday_name,ordered_on_weekends,shipping_on_weekends,weekend_duringshipping,weekend_duringshipping_Scheduleddays,weekend_between_order_ship,sc_oc,samecountry_source_dest,gapbetween_orderandship_date
45753,PAYMENT,6,4,28.73,110.49,Late delivery,1,18,Men's Footwear,Memphis,EE. UU.,XXXXXXXXX,Joshua,5648,Smith,XXXXXXXXX,Corporate,TN,864 Iron Spring Stead,38111.0,4,Apparel,35.12,-89.95,LATAM,Sao Paulo,Brazil,5648,2015-01-01 03:09:00,10,403,19.5,0.15,26,129.99,0.26,1,129.99,110.49,28.73,South America,SÃÂÃÂ£o Paulo,PENDING_PAYMENT,,403,18,,http://images.acmesports.sports/Nike+Men%27s+C...,Nike Men's CJ Elite 2 TD Football Cleat,129.99,0,2015-01-07 03:09:00,Standard Class,2,Memphis,Tennessee,United States,2015,1,2015,1,2015-01,2015-01,3,2,Thursday,Wednesday,OrderedonWeekday,ShippededonWeekday,Yes Weekend involved,Yes Weekend involved,Weekend between Order and Ship date,Memphis-Sao Paulo,Different Countries,6.0
45857,PAYMENT,6,4,-15.64,21.33,Late delivery,1,41,Trade-In,Memphis,EE. UU.,XXXXXXXXX,Joshua,5648,Smith,XXXXXXXXX,Corporate,TN,864 Iron Spring Stead,38111.0,6,Outdoors,35.12,-89.95,LATAM,Sao Paulo,Brazil,5648,2015-01-01 03:09:00,10,917,0.66,0.03,27,21.99,-0.73,1,21.99,21.33,-15.64,South America,SÃÂÃÂ£o Paulo,PENDING_PAYMENT,,917,41,,http://images.acmesports.sports/Glove+It+Women...,Glove It Women's Mod Oval 3-Zip Carry All Gol,21.99,0,2015-01-07 03:09:00,Standard Class,2,Memphis,Tennessee,United States,2015,1,2015,1,2015-01,2015-01,3,2,Thursday,Wednesday,OrderedonWeekday,ShippededonWeekday,Yes Weekend involved,Yes Weekend involved,Weekend between Order and Ship date,Memphis-Sao Paulo,Different Countries,6.0
77006,PAYMENT,6,4,-14.08,159.99,Late delivery,1,48,Water Sports,Memphis,EE. UU.,XXXXXXXXX,Joshua,5648,Smith,XXXXXXXXX,Corporate,TN,864 Iron Spring Stead,38111.0,7,Fan Shop,35.12,-89.95,LATAM,Sao Paulo,Brazil,5648,2015-01-01 03:09:00,10,1073,40.0,0.2,28,199.99,-0.09,1,199.99,159.99,-14.08,South America,SÃÂÃÂ£o Paulo,PENDING_PAYMENT,,1073,48,,http://images.acmesports.sports/Pelican+Sunstr...,Pelican Sunstream 100 Kayak,199.99,0,2015-01-07 03:09:00,Standard Class,2,Memphis,Tennessee,United States,2015,1,2015,1,2015-01,2015-01,3,2,Thursday,Wednesday,OrderedonWeekday,ShippededonWeekday,Yes Weekend involved,Yes Weekend involved,Weekend between Order and Ship date,Memphis-Sao Paulo,Different Countries,6.0
77007,PAYMENT,6,4,72.0,149.99,Late delivery,1,48,Water Sports,Memphis,EE. UU.,XXXXXXXXX,Joshua,5648,Smith,XXXXXXXXX,Corporate,TN,864 Iron Spring Stead,38111.0,7,Fan Shop,35.12,-89.95,LATAM,Sao Paulo,Brazil,5648,2015-01-01 03:09:00,10,1073,50.0,0.25,24,199.99,0.48,1,199.99,149.99,72.0,South America,SÃÂÃÂ£o Paulo,PENDING_PAYMENT,,1073,48,,http://images.acmesports.sports/Pelican+Sunstr...,Pelican Sunstream 100 Kayak,199.99,0,2015-01-07 03:09:00,Standard Class,2,Memphis,Tennessee,United States,2015,1,2015,1,2015-01,2015-01,3,2,Thursday,Wednesday,OrderedonWeekday,ShippededonWeekday,Yes Weekend involved,Yes Weekend involved,Weekend between Order and Ship date,Memphis-Sao Paulo,Different Countries,6.0
114912,PAYMENT,6,4,22.67,83.97,Late delivery,1,46,Indoor/Outdoor Games,Memphis,EE. UU.,XXXXXXXXX,Joshua,5648,Smith,XXXXXXXXX,Corporate,TN,864 Iron Spring Stead,38111.0,7,Fan Shop,35.12,-89.95,LATAM,Sao Paulo,Brazil,5648,2015-01-01 03:09:00,10,1014,15.99,0.16,25,49.98,0.27,2,99.96,83.97,22.67,South America,SÃÂÃÂ£o Paulo,PENDING_PAYMENT,,1014,46,,http://images.acmesports.sports/O%27Brien+Men%...,O'Brien Men's Neoprene Life Vest,49.98,0,2015-01-07 03:09:00,Standard Class,2,Memphis,Tennessee,United States,2015,1,2015,1,2015-01,2015-01,3,2,Thursday,Wednesday,OrderedonWeekday,ShippededonWeekday,Yes Weekend involved,Yes Weekend involved,Weekend between Order and Ship date,Memphis-Sao Paulo,Different Countries,6.0


In [19]:
order_level_columns = [
                        #Customer Info
                        'customer_city','customer_country','customer_email','customer_firstname','customer_id',
                        'customer_lastname','customer_password','customer_segment','customer_state','customer_street',
                        'customer_zip',
                        #Order Info
                        
                        'type','shipping_days_actual','shipping_days_scheduled','delivery_status','late_delivery_risk',
                        'market','order_city','order_country','order_cust_id','order_date','order_id','order_region',
                        'order_state','order_zip','shipping_date','shipping_mode','num_days_late','order_status','store_city',
                        'store_state','gapbetween_orderandship_date','samecountry_source_dest','weekend_duringshipping','weekend_duringshipping_Scheduleddays',
                        'shipping_on_weekends','ordered_on_weekends','shipping_dt_weekday','order_dt_weekday','shipping_dt_weekday_name',
                        'order_dt_weekday_name','shipping_dt_year_month','order_dt_year_month',
                        'shipping_dt_month','order_dt_month','shipping_dt_year','order_dt_year'
                        ,'store_country','sc_oc',
                        
    
                        #Product Info
                        'latitude','longitude',    
    
                      ]
                


In [20]:
df_orderlevel = df[order_level_columns].drop_duplicates()

In [21]:
df['order_id'].nunique()

65752

In [22]:
df_orderlevel.shape

(65752, 51)

In [23]:
df_orderlevel[df_orderlevel['order_id']==10].head()

Unnamed: 0,customer_city,customer_country,customer_email,customer_firstname,customer_id,customer_lastname,customer_password,customer_segment,customer_state,customer_street,customer_zip,type,shipping_days_actual,shipping_days_scheduled,delivery_status,late_delivery_risk,market,order_city,order_country,order_cust_id,order_date,order_id,order_region,order_state,order_zip,shipping_date,shipping_mode,num_days_late,order_status,store_city,store_state,gapbetween_orderandship_date,samecountry_source_dest,weekend_duringshipping,weekend_duringshipping_Scheduleddays,shipping_on_weekends,ordered_on_weekends,shipping_dt_weekday,order_dt_weekday,shipping_dt_weekday_name,order_dt_weekday_name,shipping_dt_year_month,order_dt_year_month,shipping_dt_month,order_dt_month,shipping_dt_year,order_dt_year,store_country,sc_oc,latitude,longitude
45753,Memphis,EE. UU.,XXXXXXXXX,Joshua,5648,Smith,XXXXXXXXX,Corporate,TN,864 Iron Spring Stead,38111.0,PAYMENT,6,4,Late delivery,1,LATAM,Sao Paulo,Brazil,5648,2015-01-01 03:09:00,10,South America,SÃÂÃÂ£o Paulo,,2015-01-07 03:09:00,Standard Class,2,PENDING_PAYMENT,Memphis,Tennessee,6.0,Different Countries,Yes Weekend involved,Yes Weekend involved,ShippededonWeekday,OrderedonWeekday,2,3,Wednesday,Thursday,2015-01,2015-01,1,1,2015,2015,United States,Memphis-Sao Paulo,35.12,-89.95


### <a id='5.3'>5.3 Summarizing Numerical Values to Order Level</a>
<a href='#0'>Go to top </a>

In [24]:
df_nums= ( df.groupby(['order_id'])
           .agg({'order_item_id':'count'
                 ,'order_item_qty':'sum'
                 ,'order_item_product_price':'sum'
                 ,'order_item_discount':'sum'
                 ,'order_item_total':'sum'
                })
            .reset_index().rename(columns={'order_item_id':'order_item_count','order_item_qty':'item_qty_sum'
                                   ,'order_item_product_price':'order_item_productprice_sum','order_item_discount':'discount_sum',
                                   'order_item_total':'order_item_productprice_after_discount_sum'
                                  })
         )

In [25]:
df_nums

Unnamed: 0,order_id,order_item_count,item_qty_sum,order_item_productprice_sum,discount_sum,order_item_productprice_after_discount_sum
0,1,1,1,299.98,60.00,239.98
1,2,3,7,379.98,50.60,529.38
2,4,4,14,184.96,78.98,620.87
3,5,5,10,839.92,142.79,987.07
4,7,3,7,515.96,54.40,525.52
...,...,...,...,...,...,...
65747,77200,1,1,215.82,53.96,161.87
65748,77201,1,1,215.82,43.16,172.66
65749,77202,1,1,327.75,13.11,314.64
65750,77203,1,1,11.54,0.63,10.91


In [26]:
df_orderlevel= pd.merge(df_orderlevel,df_nums,how='inner',on='order_id')

In [27]:
df_orderlevel.shape

(65752, 56)

In [28]:
df_orderlevel.columns

Index(['customer_city', 'customer_country', 'customer_email',
       'customer_firstname', 'customer_id', 'customer_lastname',
       'customer_password', 'customer_segment', 'customer_state',
       'customer_street', 'customer_zip', 'type', 'shipping_days_actual',
       'shipping_days_scheduled', 'delivery_status', 'late_delivery_risk',
       'market', 'order_city', 'order_country', 'order_cust_id', 'order_date',
       'order_id', 'order_region', 'order_state', 'order_zip', 'shipping_date',
       'shipping_mode', 'num_days_late', 'order_status', 'store_city',
       'store_state', 'gapbetween_orderandship_date',
       'samecountry_source_dest', 'weekend_duringshipping',
       'weekend_duringshipping_Scheduleddays', 'shipping_on_weekends',
       'ordered_on_weekends', 'shipping_dt_weekday', 'order_dt_weekday',
       'shipping_dt_weekday_name', 'order_dt_weekday_name',
       'shipping_dt_year_month', 'order_dt_year_month', 'shipping_dt_month',
       'order_dt_month', 'shippin

## <a id='6'>6.Writing the file to the disk</a> 
<a href='#0'>Go to top </a>

In [29]:
df_orderlevel.to_csv('C:/Users/Kishan/Desktop/SupplyChain-SAP/data/processeddata/Clean_BaseDataset_order_Gran.csv',sep=',',index = False)