## 日時差への変換
- ホテルの予約レコードを使って、予約日時とチェックイン した日時の差分を計算しましょう

In [43]:
import numpy as np
import pandas as pd

reserve_tb = pd.read_csv('data/reserve.csv', encoding='UTF-8')
reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [44]:
#reserve_datetimeをdatetime64[ns]型に変換
reserve_tb['reserve_datetime']=pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')
reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [45]:
#checkin_datetimeをdatetime64[ns]型に変換
reserve_tb['checkin_datetime']=pd.to_datetime(reserve_tb['checkin_date'] + reserve_tb['checkin_time'], format='%Y-%m-%d%H:%M:%S')
reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,checkin_datetime
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,2016-03-26 10:00:00
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,2016-07-20 11:30:00
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,2016-10-19 09:00:00
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400,2017-03-29 11:00:00
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100,2017-09-22 10:30:00


In [64]:
#年の差分を計算(月以下の日時要素は考慮しない)
reserve_tb['reserve_datetime'].dt.year - reserve_tb['checkin_datetime'].dt.year

#月の差分を計算(月以下の日時要素は考慮しない)
(reserve_tb['reserve_datetime'].dt.year*12+ reserve_tb['reserve_datetime'].dt.month)\
- (reserve_tb['checkin_datetime'].dt.year*12+reserve_tb['checkin_datetime'].dt.month)

#日単位で差分を計算
(reserve_tb['reserve_datetime']-reserve_tb['checkin_datetime']).astype('timedelta64[D]')

#時単位で差分を計算
(reserve_tb['reserve_datetime']-reserve_tb['checkin_datetime']).astype('timedelta64[h]')

#分単位で差分を計算
(reserve_tb['reserve_datetime']-reserve_tb['checkin_datetime']).astype('timedelta64[m]')

#秒単位で差分を計算
(reserve_tb['reserve_datetime']-reserve_tb['checkin_datetime']).astype('timedelta64[s]').head()

0   -1716618.0
1    -301805.0
2   -2156203.0
3   -1841990.0
4   -1435163.0
dtype: float64

## 日時の増減処理
- ホテルの予約レコードを使って1日/1時間/1分/1秒を加えましょう

In [66]:
#time delta用のライブラリを読み込み
import datetime

reserve_tb = pd.read_csv('data/reserve.csv', encoding='UTF-8')
reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [67]:
#reserve_datetimeをdatetime64[ns]型に変換
reserve_tb['reserve_datetime']=pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')
reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [73]:
#reserve_datetimeからdateを抽出
reserve_tb['reserve_date']=reserve_tb['reserve_datetime'].dt.date

#reserve_datetimeに1日加える
reserve_tb['reserve_datetime'] + datetime.timedelta(days=1)

#reserve_dateに1日加える
reserve_tb['reserve_datetime'] + datetime.timedelta(days=1)

#reserve_datetimeに1日加える
reserve_tb['reserve_date'] + datetime.timedelta(days=1)

#reserve_datetimeに1時間加える
reserve_tb['reserve_datetime'] + datetime.timedelta(hours=1)

#reserve_datetimeに1分加える
reserve_tb['reserve_datetime'] + datetime.timedelta(minutes=1)

#reserve_datetimeに1秒加える
(reserve_tb['reserve_datetime'] + datetime.timedelta(seconds=1)).head()


0   2016-03-06 13:09:43
1   2016-07-16 23:39:56
2   2016-09-24 10:03:18
3   2017-03-08 03:20:11
4   2017-09-05 19:50:38
Name: reserve_datetime, dtype: datetime64[ns]

## 季節への変換
- ホテルの予約レコードを使って予約時の季節データを生成しましょう
- 3-5月/春、6-8月/夏、9-11月/秋、12-2月/冬　とします

In [74]:
#time delta用のライブラリを読み込み
import datetime

reserve_tb = pd.read_csv('data/reserve.csv', encoding='UTF-8')
reserve_tb.head()

#reserve_datetimeをdatetime64[ns]型に変換
reserve_tb['reserve_datetime']=pd.to_datetime(reserve_tb['reserve_datetime'], format='%Y-%m-%d %H:%M:%S')
reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100


In [76]:
#月の数字を季節に変換する関数

def to_season(month_num):
    season = 'winter'
    if 3 <= month_num <= 5:
        season ='spring'
    elif 6 <= month_num <= 8:
        season = 'summer'
    elif 9 <= month_num <= 11:
        season = 'autumn'
    return season

#季節に変換
reserve_tb['reserve_season'] = pd.Categorical(reserve_tb['reserve_datetime'].dt.month.apply(to_season),categories=['spring', 'summer', 'autumn', 'winter'])

reserve_tb.head()

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,reserve_season
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,spring
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,summer
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,autumn
3,r4,h_214,c_1,2017-03-08 03:20:10,2017-03-29,11:00:00,2017-03-30,4,194400,spring
4,r5,h_16,c_1,2017-09-05 19:50:37,2017-09-22,10:30:00,2017-09-23,3,68100,autumn


## 平日/休日への変換
- 休日マスタを作ってjoinしましょう！

In [77]:
#休日マスタと結合
pd.merge(reserve_tb, holiday_mst, left_on = 'checkin_date', right_on ='target_day')

NameError: name 'holiday_mst' is not defined