### Knock31: 特定店舗の売上を Excel にして出力しよう

In [1]:
# Directory内の Data を読み込んで、Data の基礎的な加工を実施
import pandas as pd
import glob
import os

m_store = pd.read_csv('m_store.csv')
m_area = pd.read_csv('m_area.csv')

current_dir = os.getcwd()
tbl_order_file = os.path.join(current_dir, 'tbl_order_*.csv')
tbl_order_files = glob.glob(tbl_order_file)

order_all = pd.DataFrame()
for file in tbl_order_files:
    order_tmp = pd.read_csv(file)
    print(f"{file}: {order_tmp.shape[0]}")
    order_all = pd.concat([order_all, order_tmp], ignore_index=True)

# 保守用店舗 Data の削除
order_all = order_all.loc[order_all['store_id'] != 999]

order_all = pd.merge(order_all, m_store, on='store_id', how='left')
order_all = pd.merge(order_all, m_area, on='area_cd', how='left')

# Master にない Code に対応した文字列を設定
order_all.loc[order_all['takeout_flag'] == 0, 'takeout_name'] = "デリバリー"
order_all.loc[order_all['takeout_flag'] == 1, 'takeout_name'] = "お持ち帰り"

order_all.loc[order_all['status'] == 0, 'status_name'] = "受付"
order_all.loc[order_all['status'] == 1, 'status_name'] = "お支払済"
order_all.loc[order_all['status'] == 2, 'status_name'] = "お渡し済"
order_all.loc[order_all['status'] == 9, 'status_name'] = "Cancel"

order_all.loc[:, 'order_date'] = pd.to_datetime(order_all['order_accept_date']).dt.date

order_all.head()

C:\Users\leone\dsLab\machine_learning_system_100_knocks\section4\tbl_order_202004.csv: 233260
C:\Users\leone\dsLab\machine_learning_system_100_knocks\section4\tbl_order_202005.csv: 241139
C:\Users\leone\dsLab\machine_learning_system_100_knocks\section4\tbl_order_202006.csv: 233301


Unnamed: 0,order_id,store_id,customer_id,coupon_cd,sales_detail_id,order_accept_date,delivered_date,takeout_flag,total_amount,status,store_name,area_cd,wide_area,narrow_area,takeout_name,status_name,order_date
0,79339111,49,C26387220,50,67393872,2020-04-01 11:00:00,2020-04-01 11:18:00,1,4144,1,浅草店,TK,東京,東京,お持ち帰り,お支払済,2020-04-01
1,18941733,85,C48773811,26,91834983,2020-04-01 11:00:00,2020-04-01 11:22:00,0,2877,2,目黒店,TK,東京,東京,デリバリー,お渡し済,2020-04-01
2,56217880,76,C66287421,36,64409634,2020-04-01 11:00:00,2020-04-01 11:15:00,0,2603,2,本郷店,TK,東京,東京,デリバリー,お渡し済,2020-04-01
3,28447783,190,C41156423,19,73032165,2020-04-01 11:00:00,2020-04-01 11:16:00,0,2732,2,栃木店,TO,北関東,栃木,デリバリー,お渡し済,2020-04-01
4,32576156,191,C54568117,71,23281182,2020-04-01 11:00:00,2020-04-01 11:53:00,0,2987,2,伊勢崎店,GU,北関東,群馬,デリバリー,お渡し済,2020-04-01


In [2]:
# 新規 Excel file を作成し、Cell に文字列を書き込む
import openpyxl

wb = openpyxl.Workbook()
ws = wb['Sheet']
ws.cell(1, 1).value = '書き込み Test です。'
wb.save('test.xlsx')
wb.close()

In [3]:
# 作成した Excel file を開いて値を参照
wb = openpyxl.load_workbook('test.xlsx', read_only=True)
ws = wb['Sheet']
print(ws.cell(1, 1).value)
wb.close()

書き込み Test です。


In [4]:
# 特定店舗の Data を Excel に出力
# Test data の準備
store_id = 1
store_df = order_all.loc[order_all['store_id'] == store_id].copy()
store_name = store_df['store_name'].unique()[0]
store_sales_total = store_df.loc[store_df['status'].isin([1, 2])]['total_amount'].sum()
store_sales_takeout = store_df.loc[store_df['status'] == 1]['total_amount'].sum()
store_sales_delivery = store_df.loc[store_df['status'] == 2]['total_amount'].sum()
print(f"売上額確認 {store_sales_total} = {store_sales_takeout + store_sales_delivery}")
output_df = store_df[['order_accept_date', 'customer_id', 'total_amount', 'takeout_name', 'status_name']]
output_df.head()

売上額確認 9004535 = 9004535


Unnamed: 0,order_accept_date,customer_id,total_amount,takeout_name,status_name
115,2020-04-01 11:09:09,C25851661,2471,デリバリー,お渡し済
138,2020-04-01 11:11:11,C78632079,2112,デリバリー,Cancel
332,2020-04-01 11:28:28,C44700154,2122,デリバリー,Cancel
591,2020-04-01 11:49:49,C80269937,2615,お持ち帰り,お支払済
773,2020-04-01 12:05:05,C70409495,4692,デリバリー,Cancel


In [5]:
from openpyxl.utils.dataframe import dataframe_to_rows

store_title = f"{store_id}_{store_name}"

wb = openpyxl.Workbook()
ws = wb.active
ws.title = store_title

ws.cell(1, 1).value = f"{store_title} 売上 Data"

# OpenPyXL の　Utility dataframe_to_rows を利用
rows = dataframe_to_rows(output_df, index=False, header=True)

# 表の貼り付け位置
row_start = 3
col_start = 2

for row_no, row in enumerate(rows, row_start):
    for col_no, value in enumerate(row, col_start):
        ws.cell(row_no, col_no).value = value

filename = f"{store_title}.xlsx"
wb.save(filename)
wb.close()