In [1]:
import pandas as pd
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 5000)

In [2]:
data = pd.read_csv('../../data/raw/customers.csv')

In [3]:
data.head()

Unnamed: 0,house_account_balance,loyalty_balance,id,name,dial_code,phone,email,gender,birth_date,is_blacklisted,is_house_account_enabled,house_account_limit,is_loyalty_enabled,order_count,last_order_at,created_at,updated_at,deleted_at
0,0,0,975b3d7b-369f-41f6-b0b0-371484735d0f,براء الحزيري,962,797519969,,,,False,False,,False,1,2023-02-20 15:55:17,2022-09-26 08:34:53,2023-02-20 15:02:49,
1,0,0,975b3d7b-3817-4c93-9261-2c221a1e04a5,محمد ابو عايش,962,781989682,,,,False,False,,False,0,,2022-09-26 08:34:53,2022-09-26 08:34:53,
2,0,0,975b3d7b-3ea9-4ec5-9e69-d9a41eb4523e,محمد,962,791877931,,,,False,False,,False,0,,2022-09-26 08:34:53,2022-09-26 08:41:12,
3,0,0,975b3d7b-46f2-424f-b03e-c19c2c2e43c0,وائل القضاه,962,798083921,,,,False,False,,False,0,,2022-09-26 08:34:53,2022-09-26 08:41:12,
4,0,0,975b3d7b-4861-42e1-a812-a1db63ea8a8a,كندا حمدان,962,797150718,,,,False,False,,False,0,,2022-09-26 08:34:53,2022-09-26 08:41:12,


In [18]:
orders_types = {1:'Dine In', 2:'Pick Up', 3:'Delivery', '4':'Drive Thru'}
orders_sources = {1:'Cashier', 2:'API', 3:'Call Center'}
orders_statuses = {1:'Pending', 2:'Active', 3:'Declined', 4:'Closed', 5:'Returned', 6:'Joined', 7:'Void'}
orders_delivery_statuses = {1:'sent to kitchen', 2:'ready', 3:'assigned', 4:'en route', 5:'delivered', 6:'closed'}
products_statuses = {1:'Pending', 2:'Active', 3:'Closed', 4:'Moved', 5:'Void', 6:'Returned', 7:'Declined'}
discounts_types = {1:'Open', 2:'Predefined', 3:'Coupon', 4:'Loyalty', 5:'Promotion'}

In [19]:
data.dtypes

id                                object
app_id                            object
promotion_id                     float64
discount_type                    float64
reference_x                      float64
number                             int64
type                               int64
source                             int64
status                             int64
delivery_status                  float64
guests                             int64
kitchen_notes                     object
customer_notes                    object
business_date                     object
subtotal_price                   float64
discount_amount                  float64
rounding_amount                    int64
total_price                      float64
tax_exclusive_discount_amount    float64
delay_in_seconds                 float64
meta                              object
opened_at                         object
accepted_at                       object
due_at                            object
driver_assigned_

In [20]:
data['type'] = data['type'].map(orders_types)
data['source'] = data['source'].map(orders_sources)
data['status'] = data['status'].map(orders_statuses)
data['delivery_status'] = data['delivery_status'].map(orders_delivery_statuses)
# data['product_status'] = data['product_status'].map(products_statuses)
data['discount_type'] = data['discount_type'].map(discounts_types)


In [21]:
needed_columns = ['id', 'created_at', 'type', 'source', 'status', 'subtotal_price', 'total_price']

In [22]:
sub_data = data[needed_columns].copy()

In [23]:
sub_data.isnull().sum()

id                0
created_at        0
type              0
source            0
status            0
subtotal_price    0
total_price       0
dtype: int64

In [24]:
sub_data.isnull().sum()

id                0
created_at        0
type              0
source            0
status            0
subtotal_price    0
total_price       0
dtype: int64

In [25]:
sub_data.sort_values("created_at", inplace=True)

In [26]:
sub_data = sub_data.reset_index(drop=True)

In [27]:
sub_data.status.value_counts()

Closed      14571
Void          334
Returned       65
Name: status, dtype: int64

In [28]:
sub_data.source.value_counts()

Cashier        14959
Call Center       11
Name: source, dtype: int64

In [29]:
sub_data.type.value_counts()

Pick Up     12206
Dine In      2752
Delivery       12
Name: type, dtype: int64

In [30]:
sub_data['created_at'] = pd.to_datetime(sub_data['created_at'])

In [31]:
sub_data['date'] = sub_data['created_at'].dt.date
sub_data['hour'] = sub_data['created_at'].dt.hour
sub_data['day_name'] = sub_data['created_at'].dt.day_name()
sub_data['is_weekend'] = sub_data['created_at'].dt.dayofweek.isin([4, 5])
sub_data['is_weekend'].replace({True:'Yes', False:'No'}, inplace=True)
sub_data['month'] = sub_data['created_at'].dt.month
sub_data['year'] = sub_data['created_at'].dt.year




In [35]:
final_orders_data = sub_data.groupby(['date', 'hour', 'month', 'year', 'day_name', 'is_weekend', 'type', 'source']).agg(cashflow=('total_price', 'sum')).reset_index()

In [36]:
final_orders_data.tail()

Unnamed: 0,date,hour,month,year,day_name,is_weekend,type,source,cashflow
4076,2023-05-25,7,5,2023,Thursday,No,Dine In,Cashier,0.0
4077,2023-05-25,8,5,2023,Thursday,No,Dine In,Cashier,0.0
4078,2023-05-25,8,5,2023,Thursday,No,Pick Up,Cashier,14.0
4079,2023-05-25,9,5,2023,Thursday,No,Pick Up,Cashier,15.25
4080,2023-05-25,10,5,2023,Thursday,No,Dine In,Cashier,12.5


In [37]:
final_orders_data.to_csv('data/processed/orders.csv', index=False)