In [108]:
import pandas as pd
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 5000)

In [109]:
data = pd.read_csv('data/raw/orders.csv')

In [110]:
orders_types = {1:'Dine In', 2:'Pick Up', 3:'Delivery', '4':'Drive Thru'}
orders_sources = {1:'Cashier', 2:'API', 3:'Call Center'}
orders_statuses = {1:'Pending', 2:'Active', 3:'Declined', 4:'Closed', 5:'Returned', 6:'Joined', 7:'Void'}
orders_delivery_statuses = {1:'sent to kitchen', 2:'ready', 3:'assigned', 4:'en route', 5:'delivered', 6:'closed'}
products_statuses = {1:'Pending', 2:'Active', 3:'Closed', 4:'Moved', 5:'Void', 6:'Returned', 7:'Declined'}
discounts_types = {1:'Open', 2:'Predefined', 3:'Coupon', 4:'Loyalty', 5:'Promotion'}

In [111]:
data.dtypes

id                                object
app_id                            object
promotion_id                     float64
discount_type                    float64
reference_x                      float64
number                             int64
type                               int64
source                             int64
status                             int64
delivery_status                  float64
guests                             int64
kitchen_notes                     object
customer_notes                    object
business_date                     object
subtotal_price                   float64
discount_amount                  float64
rounding_amount                    int64
total_price                      float64
tax_exclusive_discount_amount    float64
delay_in_seconds                 float64
meta                              object
opened_at                         object
accepted_at                       object
due_at                            object
driver_assigned_

In [112]:
data['type'] = data['type'].map(orders_types)
data['source'] = data['source'].map(orders_sources)
data['status'] = data['status'].map(orders_statuses)
data['delivery_status'] = data['delivery_status'].map(orders_delivery_statuses)
# data['product_status'] = data['product_status'].map(products_statuses)
data['discount_type'] = data['discount_type'].map(discounts_types)


In [113]:
needed_columns = ['id', 'created_at', 'type', 'source', 'status', 'subtotal_price', 'total_price']

In [114]:
sub_data = data[needed_columns].copy()

In [115]:
sub_data.isnull().sum()

id                0
created_at        0
type              0
source            0
status            0
subtotal_price    0
total_price       0
dtype: int64

In [116]:
sub_data.isnull().sum()

id                0
created_at        0
type              0
source            0
status            0
subtotal_price    0
total_price       0
dtype: int64

In [117]:
sub_data.sort_values("created_at", inplace=True)

In [118]:
sub_data = sub_data.reset_index(drop=True)

In [119]:
sub_data.status.value_counts()

Closed      14571
Void          334
Returned       65
Name: status, dtype: int64

In [120]:
sub_data.source.value_counts()

Cashier        14959
Call Center       11
Name: source, dtype: int64

In [121]:
sub_data.type.value_counts()

Pick Up     12206
Dine In      2752
Delivery       12
Name: type, dtype: int64

In [122]:
sub_data['created_at'] = pd.to_datetime(sub_data['created_at'])

In [123]:
sub_data['create_date'] = sub_data['created_at'].dt.date
sub_data['create_hour'] = sub_data['created_at'].dt.hour
sub_data['create_day_name'] = sub_data['created_at'].dt.day_name()
sub_data['is_weekend'] = sub_data['created_at'].dt.dayofweek.isin([4, 5])
sub_data['is_weekend'].replace({True:'Yes', False:'No'}, inplace=True)
sub_data['month'] = sub_data['created_at'].dt.month
sub_data['year'] = sub_data['created_at'].dt.year




In [124]:
final_orders_data = sub_data.groupby(['create_date', 'month', 'year', 'create_hour', 'create_day_name', 'is_weekend']).agg(cashflow=('total_price', 'sum')).reset_index()

In [125]:
final_orders_data.tail()

Unnamed: 0,create_date,month,year,create_hour,create_day_name,is_weekend,cashflow
2728,2023-05-24,5,2023,22,Wednesday,No,19.75
2729,2023-05-25,5,2023,7,Thursday,No,0.0
2730,2023-05-25,5,2023,8,Thursday,No,14.0
2731,2023-05-25,5,2023,9,Thursday,No,15.25
2732,2023-05-25,5,2023,10,Thursday,No,12.5


In [126]:
final_orders_data.to_csv('data/processed/orders.csv', index=False)