### Knock51: Data 加工の下準備をしよう
- Data 加工の下準備に入る前に、どのような機械学習 Model を構築するかを考える。
- 今回は、教師あり学習（二値分類）Model
    - 前月までの Data から、来月の Order 数が増加するのか減少するのか予測する Case は、Order 数自体を予測するのではなく、増加するのか減少するのか減少かを予測する。

In [1]:
# Directory 作成
import os

data_dir = 'data'
input_dir = os.path.join(data_dir, '0_input')
output_dir = os.path.join(data_dir, '1_output')
master_dir = os.path.join(data_dir, '99_master')
os.makedirs(input_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
os.makedirs(master_dir, exist_ok=True)

Data を読み込む際に File 複数が存在する場合、一つ一つ指定していたら大変なので、自動的に Directory の中身を取りに行き、配列として Path を定義する

In [2]:
# 注文 Data path の取得
import glob
tbl_order_file = os.path.join(input_dir, 'tbl_order_*.csv')
tbl_order_paths = glob.glob(tbl_order_file)
tbl_order_paths

['data\\0_input\\tbl_order_201904.csv',
 'data\\0_input\\tbl_order_201905.csv',
 'data\\0_input\\tbl_order_201906.csv',
 'data\\0_input\\tbl_order_201907.csv',
 'data\\0_input\\tbl_order_201908.csv',
 'data\\0_input\\tbl_order_201909.csv',
 'data\\0_input\\tbl_order_201910.csv',
 'data\\0_input\\tbl_order_201911.csv',
 'data\\0_input\\tbl_order_201912.csv',
 'data\\0_input\\tbl_order_202001.csv',
 'data\\0_input\\tbl_order_202002.csv',
 'data\\0_input\\tbl_order_202003.csv']

### Knock52: Data の読み込みを行ない加工の方向性を検討しよう

In [3]:
# Master data の読み込み
import pandas as pd

m_area_file = 'm_area.csv'
m_store_file = 'm_store.csv'
m_area = pd.read_csv(os.path.join(master_dir, m_area_file))
m_store = pd.read_csv(os.path.join(master_dir, m_store_file))
m_area.head(3)

Unnamed: 0,area_cd,wide_area,narrow_area
0,TK,東京,東京
1,KN,神奈川,神奈川
2,CH,千葉,千葉


Directory 構造をしっかり考え、変数名を統一することで、簡単に使い回すことができる。

In [4]:
# 注文 Data の読み込み
tbl_order_path = tbl_order_paths[0]
print(f"読み込み Data: {tbl_order_path}")
order_data = pd.read_csv(tbl_order_path)
print(f"Data 件数: {len(order_data)}")
order_data.head(3)

読み込み Data: data\0_input\tbl_order_201904.csv
Data 件数: 233393


Unnamed: 0,order_id,store_id,customer_id,coupon_cd,sales_detail_id,order_accept_date,delivered_date,takeout_flag,total_amount,status
0,22808272,39,C80973292,40,22222408,2019-04-01 11:00:00,2019-04-01 11:26:00,1,2112,1
1,10902625,63,C94948343,20,79467084,2019-04-01 11:00:00,2019-04-01 11:47:00,0,2154,2
2,5990375,63,C91814442,1,61749935,2019-04-01 11:00:00,2019-04-01 11:10:00,0,3050,2


Data 件数が非常に多い（7桁くらい）場合、Data をやり取りするには、処理に時間がかかってしまう。
よって、まずは少量の Data 量で加工の方向を考え、全期間の Data に適用する。

### Knock53: 1ヶ月分の Data の基本的な Data 加工を実施しよう

In [5]:
# 基本的な Data 加工
order_data = order_data.loc[order_data['store_id'] != 999]

order_data = pd.merge(order_data, m_store, on='store_id', how='left')
order_data = pd.merge(order_data, m_area, on='area_cd', how='left')

order_data.loc[order_data['takeout_flag'] == 0, 'takeout_name'] = "デリバリー"
order_data.loc[order_data['takeout_flag'] == 1, 'takeout_name'] = "お持ち帰り"

order_data.loc[order_data['status'] == 0, 'status_name'] = "受付"
order_data.loc[order_data['status'] == 1, 'status_name'] = "お支払済"
order_data.loc[order_data['status'] == 2, 'status_name'] = "お渡し済"
order_data.loc[order_data['status'] == 9, 'status_name'] = "キャンセル"
order_data.head(3)

Unnamed: 0,order_id,store_id,customer_id,coupon_cd,sales_detail_id,order_accept_date,delivered_date,takeout_flag,total_amount,status,store_name,area_cd,wide_area,narrow_area,takeout_name,status_name
0,22808272,39,C80973292,40,22222408,2019-04-01 11:00:00,2019-04-01 11:26:00,1,2112,1,杉並店,TK,東京,東京,お持ち帰り,お支払済
1,10902625,63,C94948343,20,79467084,2019-04-01 11:00:00,2019-04-01 11:47:00,0,2154,2,西多摩店,TK,東京,東京,デリバリー,お渡し済
2,5990375,63,C91814442,1,61749935,2019-04-01 11:00:00,2019-04-01 11:10:00,0,3050,2,西多摩店,TK,東京,東京,デリバリー,お渡し済


In [6]:
# 欠損値の確認
order_data.isna().sum()

order_id             0
store_id             0
customer_id          0
coupon_cd            0
sales_detail_id      0
order_accept_date    0
delivered_date       0
takeout_flag         0
total_amount         0
status               0
store_name           0
area_cd              0
wide_area            0
narrow_area          0
takeout_name         0
status_name          0
dtype: int64

### Knock54: 機械学習に使用する変数を作成しよう

In [7]:
# Pizza 提供までの時間の作成
def calc_delta(t):
    t1, t2 = t
    delta = t2 - t1
    return delta.total_seconds() / 60


order_data.loc[:, 'order_accept_datetime'] = pd.to_datetime(order_data['order_accept_date'])
order_data.loc[:, 'delivered_datetime'] = pd.to_datetime(order_data['delivered_date'])
order_data.loc[:, 'delta'] = order_data[['order_accept_datetime', 'delivered_datetime']].apply(calc_delta, axis=1)
order_data.head(3)

Unnamed: 0,order_id,store_id,customer_id,coupon_cd,sales_detail_id,order_accept_date,delivered_date,takeout_flag,total_amount,status,store_name,area_cd,wide_area,narrow_area,takeout_name,status_name,order_accept_datetime,delivered_datetime,delta
0,22808272,39,C80973292,40,22222408,2019-04-01 11:00:00,2019-04-01 11:26:00,1,2112,1,杉並店,TK,東京,東京,お持ち帰り,お支払済,2019-04-01 11:00:00,2019-04-01 11:26:00,26.0
1,10902625,63,C94948343,20,79467084,2019-04-01 11:00:00,2019-04-01 11:47:00,0,2154,2,西多摩店,TK,東京,東京,デリバリー,お渡し済,2019-04-01 11:00:00,2019-04-01 11:47:00,47.0
2,5990375,63,C91814442,1,61749935,2019-04-01 11:00:00,2019-04-01 11:10:00,0,3050,2,西多摩店,TK,東京,東京,デリバリー,お渡し済,2019-04-01 11:00:00,2019-04-01 11:10:00,10.0


In [8]:
# 日付関連変数の作成
order_data.loc[:, 'order_accept_hour'] = order_data['order_accept_datetime'].dt.hour
order_data.loc[:, 'order_accept_weekday'] = order_data['order_accept_datetime'].dt.weekday
order_data.loc[order_data['order_accept_weekday'] >= 5, 'weekday_info'] = "休日"
order_data.loc[order_data['order_accept_weekday'] < 5, 'weekday_info'] = "平日"
order_data.head(3)

Unnamed: 0,order_id,store_id,customer_id,coupon_cd,sales_detail_id,order_accept_date,delivered_date,takeout_flag,total_amount,status,...,wide_area,narrow_area,takeout_name,status_name,order_accept_datetime,delivered_datetime,delta,order_accept_hour,order_accept_weekday,weekday_info
0,22808272,39,C80973292,40,22222408,2019-04-01 11:00:00,2019-04-01 11:26:00,1,2112,1,...,東京,東京,お持ち帰り,お支払済,2019-04-01 11:00:00,2019-04-01 11:26:00,26.0,11,0,平日
1,10902625,63,C94948343,20,79467084,2019-04-01 11:00:00,2019-04-01 11:47:00,0,2154,2,...,東京,東京,デリバリー,お渡し済,2019-04-01 11:00:00,2019-04-01 11:47:00,47.0,11,0,平日
2,5990375,63,C91814442,1,61749935,2019-04-01 11:00:00,2019-04-01 11:10:00,0,3050,2,...,東京,東京,デリバリー,お渡し済,2019-04-01 11:00:00,2019-04-01 11:10:00,10.0,11,0,平日


- weekday は、0 の月曜日に始まり、6 が日曜日となる。
- 上記より、平日: 5 未満、休日: 5 以上で分岐できる。

### Knock55: 店舗単位に集計して変数を作成しよう

In [9]:
# Order 数の集計
store_data = order_data.groupby(['store_name']).count()[['order_id']]
store_f = order_data.loc[
    (order_data['status_name'] == "お渡し済")
    | (order_data['status_name'] == "お支払済")
    ].groupby(['store_name']).count()[['order_id']]
store_c = order_data.loc[order_data['status_name'] == "キャンセル"].groupby(['store_name']).count()[['order_id']]
store_d = order_data.loc[order_data['takeout_name'] == "デリバリー"].groupby(['store_name']).count()[['order_id']]
store_t = order_data.loc[order_data['takeout_name'] == "お持ち帰り"].groupby(['store_name']).count()[['order_id']]

store_weekday = order_data.loc[order_data['weekday_info'] == "平日"].groupby(['store_name']).count()[['order_id']]
store_weekend = order_data.loc[order_data['weekday_info'] == "休日"].groupby(['store_name']).count()[['order_id']]

In [10]:
# 時間帯別 Order 数の集計
times = order_data['order_accept_hour'].unique()
store_time = []
for time in times:
    time_tmp = order_data.loc[order_data['order_accept_hour'] == time].groupby(['store_name']).count()[['order_id']]
    time_tmp.columns = [f"order_time_{time}"]
    store_time.append(time_tmp)
store_time = pd.concat(store_time, axis=1)
store_time.head(3)

Unnamed: 0_level_0,order_time_11,order_time_12,order_time_13,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21
store_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
あきる野店,91,122,112,101,95,107,106,100,108,109,96
さいたま南店,130,135,147,143,142,137,130,113,140,132,155
さいたま緑店,95,91,106,95,102,82,90,93,95,95,84


In [11]:
# 提供までの時間の集計と集計結果の結合
store_delta = order_data.loc[(order_data['status_name'] != "キャンセル")].groupby(['store_name']).mean()[['delta']]
store_data.columns = ['order']
store_f.columns = ['order_fin']
store_c.columns = ['order_cancel']
store_d.columns = ['order_takeout']
store_weekday.columns = ['order_weekday']
store_weekend.columns = ['order_weekend']
store_delta.columns = ['delta_avg']
store_data = pd.concat(
    [store_data, store_f, store_c, store_d, store_t, store_weekday, store_weekend, store_time, store_delta], axis=1)
store_data.head(3)

Unnamed: 0_level_0,order,order_fin,order_cancel,order_takeout,order_id,order_weekday,order_weekend,order_time_11,order_time_12,order_time_13,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21,delta_avg
store_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
あきる野店,1147,945,202,841,306,844,303,91,122,112,101,95,107,106,100,108,109,96,34.110053
さいたま南店,1504,1217,287,1105,399,1104,400,130,135,147,143,142,137,130,113,140,132,155,35.337716
さいたま緑店,1028,847,181,756,272,756,272,95,91,106,95,102,82,90,93,95,95,84,34.291617


### Knock56: Data の加工と店舗別集計を関数で実行しよう

In [12]:
# Data 加工関数の作成
def data_processing(order_data):
    order_data = order_data.loc[order_data['store_id'] != 999]
    order_data = pd.merge(order_data, m_store, on='store_id', how='left')
    order_data = pd.merge(order_data, m_area, on='area_cd', how='left')
    order_data.loc[order_data['takeout_flag'] == 0, 'takeout_name'] = "デリバリー"
    order_data.loc[order_data['takeout_flag'] == 1, 'takeout_name'] = "お持ち帰り"
    order_data.loc[order_data['status'] == 0, 'status_name'] = "受付"
    order_data.loc[order_data['status'] == 1, 'status_name'] = "お支払済"
    order_data.loc[order_data['status'] == 2, 'status_name'] = "お渡し済"
    order_data.loc[order_data['status'] == 9, 'status_name'] = "キャンセル"

    order_data.loc[:, 'order_accept_datetime'] = pd.to_datetime(order_data['order_accept_date'])
    order_data.loc[:, 'delivered_datetime'] = pd.to_datetime(order_data['delivered_date'])
    order_data.loc[:, 'delta'] = order_data[['order_accept_datetime', 'delivered_datetime']].apply(calc_delta, axis=1)
    order_data.loc[:, 'order_accept_hour'] = order_data['order_accept_datetime'].dt.hour
    order_data.loc[:, 'order_accept_weekday'] = order_data['order_accept_datetime'].dt.weekday
    order_data.loc[order_data['order_accept_weekday'] >= 5, 'weekday_info'] = "休日"
    order_data.loc[order_data['order_accept_weekday'] < 5, 'weekday_info'] = "平日"

    store_data = order_data.groupby(['store_name']).count()[['order_id']]
    store_f = order_data.loc[
        (order_data['status_name'] == "お渡し済")
        | (order_data['status_name'] == "お支払済")
        ].groupby(['store_name']).count()[['order_id']]
    store_c = order_data.loc[order_data['status_name'] == "キャンセル"].groupby(['store_name']).count()[['order_id']]
    store_d = order_data.loc[order_data['takeout_name'] == "デリバリー"].groupby(['store_name']).count()[['order_id']]
    store_t = order_data.loc[order_data['takeout_name'] == "お持ち帰り"].groupby(['store_name']).count()[['order_id']]
    store_weekday = order_data.loc[order_data['weekday_info'] == "平日"].groupby(['store_name']).count()[['order_id']]
    store_weekend = order_data.loc[order_data['weekday_info'] == "休日"].groupby(['store_name']).count()[['order_id']]
    times = order_data['order_accept_hour'].unique()
    store_time = []
    for time in times:
        time_tmp = order_data.loc[order_data['order_accept_hour'] == time].groupby(['store_name']).count()[['order_id']]
        time_tmp.columns = [f"order_time_{time}"]
        store_time.append(time_tmp)
    store_time = pd.concat(store_time, axis=1)
    store_delta = order_data.loc[order_data['status_name'] != "キャンセル"].groupby(['store_name']).mean()[['delta']]
    store_data.columns = ['order']
    store_f.columns = ['order_fin']
    store_c.columns = ['order_cancel']
    store_d.columns = ['order_delivery']
    store_t.columns = ['order_takeout']
    store_delta.columns = ['delta_avg']
    store_weekday.columns = ['order_weekday']
    store_weekend.columns = ['order_weekend']
    store_data = pd.concat(
        [store_data, store_f, store_c, store_d, store_t, store_weekday, store_weekend, store_time, store_delta], axis=1)
    return store_data

In [13]:
# Data 加工関数の実行
tbl_order_path = tbl_order_paths[0]
print(f"読み込み Data: {tbl_order_path}")
order_data = pd.read_csv(tbl_order_path)
store_data = data_processing(order_data)
store_data.head(3)

読み込み Data: data\0_input\tbl_order_201904.csv


Unnamed: 0_level_0,order,order_fin,order_cancel,order_delivery,order_takeout,order_weekday,order_weekend,order_time_11,order_time_12,order_time_13,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21,delta_avg
store_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
あきる野店,1147,945,202,841,306,844,303,91,122,112,101,95,107,106,100,108,109,96,34.110053
さいたま南店,1504,1217,287,1105,399,1104,400,130,135,147,143,142,137,130,113,140,132,155,35.337716
さいたま緑店,1028,847,181,756,272,756,272,95,91,106,95,102,82,90,93,95,95,84,34.291617


### Knock57: 全 Data の読み込みと Data 加工をやってみよう

In [14]:
from IPython.display import display

# 提供までの時間の集計と集計結果の結合
store_all = []
for tbl_order_path in tbl_order_paths:
    print(f"読み込み Data: {tbl_order_path}")
    tg_ym = tbl_order_path.split('_')[-1][:6]
    order_data = pd.read_csv(tbl_order_path)
    store_data = data_processing(order_data)
    store_data.loc[:, 'year_month'] = tg_ym
    store_data.reset_index(drop=False, inplace=True)
    store_all.append(store_data)
store_all = pd.concat(store_all, ignore_index=True)
display(store_all.head(3))
display(store_all.tail(3))
store_monthly_name = 'store_monthly_data.csv'
store_all.to_csv(os.path.join(output_dir, store_monthly_name), index=False)

読み込み Data: data\0_input\tbl_order_201904.csv
読み込み Data: data\0_input\tbl_order_201905.csv
読み込み Data: data\0_input\tbl_order_201906.csv
読み込み Data: data\0_input\tbl_order_201907.csv
読み込み Data: data\0_input\tbl_order_201908.csv
読み込み Data: data\0_input\tbl_order_201909.csv
読み込み Data: data\0_input\tbl_order_201910.csv
読み込み Data: data\0_input\tbl_order_201911.csv
読み込み Data: data\0_input\tbl_order_201912.csv
読み込み Data: data\0_input\tbl_order_202001.csv
読み込み Data: data\0_input\tbl_order_202002.csv
読み込み Data: data\0_input\tbl_order_202003.csv


Unnamed: 0,store_name,order,order_fin,order_cancel,order_delivery,order_takeout,order_weekday,order_weekend,order_time_11,order_time_12,...,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21,delta_avg,year_month
0,あきる野店,1147,945,202,841,306,844,303,91,122,...,101,95,107,106,100,108,109,96,34.110053,201904
1,さいたま南店,1504,1217,287,1105,399,1104,400,130,135,...,143,142,137,130,113,140,132,155,35.337716,201904
2,さいたま緑店,1028,847,181,756,272,756,272,95,91,...,95,102,82,90,93,95,95,84,34.291617,201904


Unnamed: 0,store_name,order,order_fin,order_cancel,order_delivery,order_takeout,order_weekday,order_weekend,order_time_11,order_time_12,...,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21,delta_avg,year_month
2337,鴻巣店,1153,951,202,863,290,821,332,105,107,...,119,89,108,99,106,107,101,101,34.634069,202003
2338,鶴見店,1514,1236,278,1093,421,1071,443,144,138,...,137,161,152,130,136,138,129,131,34.817152,202003
2339,麻生店,1397,1145,252,1050,347,994,403,121,115,...,120,113,139,129,117,144,141,136,34.383406,202003


- **目的変数**: 機械学習において予測したい変数のこと
- **説明変数**: 機械学習において予測に使用する変数のこと

### Knock58: 目的変数を作成しよう

In [15]:
# 1ヶ月前日付の作成
y = store_all[['store_name', 'year_month', 'order_weekday', 'order_weekend']].copy()
y.loc[:, 'one_month_ago'] = pd.to_datetime(y['year_month'], format='%Y%m')

from dateutil.relativedelta import relativedelta

y.loc[:, 'one_month_ago'] = y['one_month_ago'].map(lambda x: x - relativedelta(months=1))
y.loc[:, 'one_month_ago'] = y['one_month_ago'].dt.strftime('%Y%m')
y.head(3)

Unnamed: 0,store_name,year_month,order_weekday,order_weekend,one_month_ago
0,あきる野店,201904,844,303,201903
1,さいたま南店,201904,1104,400,201903
2,さいたま緑店,201904,756,272,201903


In [16]:
# 1ヶ月前の Order 数の作成
y_one_month_ago = y.copy()
y_one_month_ago.rename(columns={'order_weekday': 'order_weekday_one_month_ago',
                                'order_weekend': 'order_weekend_one_month_ago',
                                'year_month': 'year_month_for_join'}, inplace=True)
y = pd.merge(y, y_one_month_ago[
    ['store_name', 'year_month_for_join', 'order_weekday_one_month_ago', 'order_weekend_one_month_ago']],
             left_on=['store_name', 'one_month_ago'], right_on=['store_name', 'year_month_for_join'], how='left')
y.loc[y['store_name'] == 'あきる野店']

Unnamed: 0,store_name,year_month,order_weekday,order_weekend,one_month_ago,year_month_for_join,order_weekday_one_month_ago,order_weekend_one_month_ago
0,あきる野店,201904,844,303,201903,,,
195,あきる野店,201905,883,302,201904,201904.0,844.0,303.0
390,あきる野店,201906,764,384,201905,201905.0,883.0,302.0
585,あきる野店,201907,882,308,201906,201906.0,764.0,384.0
780,あきる野店,201908,835,343,201907,201907.0,882.0,308.0
975,あきる野店,201909,802,347,201908,201908.0,835.0,343.0
1170,あきる野店,201910,880,309,201909,201909.0,802.0,347.0
1365,あきる野店,201911,796,341,201910,201910.0,880.0,309.0
1560,あきる野店,201912,844,345,201911,201911.0,796.0,341.0
1755,あきる野店,202001,881,305,201912,201912.0,844.0,345.0


In [17]:
# 1ヶ月前の Order 数の作成
y.dropna(inplace=True)
y.loc[y['order_weekday'] - y['order_weekday_one_month_ago'] > 0, 'y_weekday'] = 1
y.loc[y['order_weekday'] - y['order_weekday_one_month_ago'] <= 0, 'y_weekday'] = 0
y.loc[y['order_weekend'] - y['order_weekend_one_month_ago'] > 0, 'y_weekend'] = 1
y.loc[y['order_weekend'] - y['order_weekend_one_month_ago'] <= 0, 'y_weekend'] = 0
y.head(3)

Unnamed: 0,store_name,year_month,order_weekday,order_weekend,one_month_ago,year_month_for_join,order_weekday_one_month_ago,order_weekend_one_month_ago,y_weekday,y_weekend
195,あきる野店,201905,883,302,201904,201904,844.0,303.0,1.0,0.0
196,さいたま南店,201905,1152,401,201904,201904,1104.0,400.0,1.0,1.0
197,さいたま緑店,201905,796,274,201904,201904,756.0,272.0,1.0,1.0


In [9]:
# Order 数の集計
store_data = order_data.groupby(['store_name']).count()[['order_id']]
store_f = order_data.loc[
    (order_data['status_name'] == "お渡し済")
    | (order_data['status_name'] == "お支払済")
    ].groupby(['store_name']).count()[['order_id']]
store_c = order_data.loc[order_data['status_name'] == "キャンセル"].groupby(['store_name']).count()[['order_id']]
store_d = order_data.loc[order_data['takeout_name'] == "デリバリー"].groupby(['store_name']).count()[['order_id']]
store_t = order_data.loc[order_data['takeout_name'] == "お持ち帰り"].groupby(['store_name']).count()[['order_id']]

store_weekday = order_data.loc[order_data['weekday_info'] == "平日"].groupby(['store_name']).count()[['order_id']]
store_weekend = order_data.loc[order_data['weekday_info'] == "休日"].groupby(['store_name']).count()[['order_id']]

In [12]:
# 時間帯別 Order 数の集計
times = order_data['order_accept_hour'].unique()
store_time = []
for time in times:
    time_tmp = order_data.loc[order_data['order_accept_hour'] == time].groupby(['store_name']).count()[['order_id']]
    time_tmp.columns = [f"order_time_{time}"]
    store_time.append(time_tmp)
store_time = pd.concat(store_time, axis=1)
store_time.head(3)

Unnamed: 0_level_0,order_time_11,order_time_12,order_time_13,order_time_14,order_time_15,order_time_16,order_time_17,order_time_18,order_time_19,order_time_20,order_time_21
store_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
あきる野店,91,122,112,101,95,107,106,100,108,109,96
さいたま南店,130,135,147,143,142,137,130,113,140,132,155
さいたま緑店,95,91,106,95,102,82,90,93,95,95,84


In [None]:
# 提供までの時間の集計と集計結果の結合
store_delta = order_data.loc[(order_data['status_name'] != "キャンセル")].groupby(['store_name']).mean()[['delta']]
store_data.columns = ['order']
store_f.columns = ['order_fin']
store_c.columns = ['order_cancel']
store_