# Preparing our Dataset to Model Demand

In [3]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.preprocessing import OneHotEncoder as ohe

from dbds import generate_hotel_dfs
from agg import prep_demand_features
from agg_utils import stly_cols_agg, ly_cols_agg, drop_cols_agg, stly_pace_cols, ty_pace_cols

pd.options.display.max_rows = 150
pd.options.display.max_columns = 250
pd.options.display.max_colwidth = None

DATE_FMT = "%Y-%m-%d"
h1_capacity = 187
h2_capacity = 226
AOD = "2017-08-01"
AOD_dt = pd.to_datetime(AOD)

h1_res = pd.read_pickle("pickle/h1_res.pick")
h2_res = pd.read_pickle("pickle/h2_res.pick")
h1_dbd = pd.read_pickle("pickle/h1_dbd.pick")
h2_dbd = pd.read_pickle("pickle/h2_dbd.pick")

In [4]:
h1_res, h1_dbd = generate_hotel_dfs("../data/H1.csv", capacity=h1_capacity)
h2_res, h2_dbd = generate_hotel_dfs("../data/H2.csv", capacity=h2_capacity)

h1_res.to_pickle("pickle/h1_res.pick")
h1_dbd.to_pickle("pickle/h1_dbd.pick")
h2_res.to_pickle("pickle/h2_res.pick")
h2_dbd.to_pickle("pickle/h2_dbd.pick")

Hotel dataframes generated successfully!
Hotel capacity: 187 rooms
Hotel data date range: 2015-07-01 to 2017-08-31
Hotel dataframes generated successfully!
Hotel capacity: 226 rooms
Hotel data date range: 2015-07-01 to 2017-08-31


## Combine Files Generated by save_sims.py

In [5]:
# generate list of relevant files
import datetime as dt
hotel_num = 1
h = 'h' + str(hotel_num)
SIM_AOD = pd.to_datetime(dt.date(2017, 8, 1), format=DATE_FMT)
sim_start = SIM_AOD - pd.DateOffset(365*2) # need > 364 days of actuals for each date, the rest future-looking

FOLDER = "./sims2/"
lam_include = lambda x: x[:2] == h and pd.to_datetime(x[7:17]) >= sim_start
h1_files = [f for f in os.listdir(FOLDER) if lam_include(f)]
print(h1_files)
h1_files.sort()
len(h1_files), h1_files[0], h1_files[-1] # note STLY date of 8/1/17 == 8/2/16 (matching weekday)

['h1_sim_2015-08-02.pick', 'h1_sim_2015-08-03.pick', 'h1_sim_2015-08-04.pick', 'h1_sim_2015-08-05.pick', 'h1_sim_2015-08-06.pick', 'h1_sim_2015-08-07.pick', 'h1_sim_2015-08-08.pick', 'h1_sim_2015-08-09.pick', 'h1_sim_2015-08-10.pick', 'h1_sim_2015-08-11.pick', 'h1_sim_2015-08-12.pick', 'h1_sim_2015-08-13.pick', 'h1_sim_2015-08-14.pick', 'h1_sim_2015-08-15.pick', 'h1_sim_2015-08-16.pick', 'h1_sim_2015-08-17.pick', 'h1_sim_2015-08-18.pick', 'h1_sim_2015-08-19.pick', 'h1_sim_2015-08-20.pick', 'h1_sim_2015-08-21.pick', 'h1_sim_2015-08-22.pick', 'h1_sim_2015-08-23.pick', 'h1_sim_2015-08-24.pick', 'h1_sim_2015-08-25.pick', 'h1_sim_2015-08-26.pick', 'h1_sim_2015-08-27.pick', 'h1_sim_2015-08-28.pick', 'h1_sim_2015-08-29.pick', 'h1_sim_2015-08-30.pick', 'h1_sim_2015-08-31.pick', 'h1_sim_2015-09-01.pick', 'h1_sim_2015-09-02.pick', 'h1_sim_2015-09-03.pick', 'h1_sim_2015-09-04.pick', 'h1_sim_2015-09-05.pick', 'h1_sim_2015-09-06.pick', 'h1_sim_2015-09-07.pick', 'h1_sim_2015-09-08.pick', 'h1_sim_201

(731, 'h1_sim_2015-08-02.pick', 'h1_sim_2017-08-01.pick')

In [6]:
%%time
df_sim = pd.DataFrame()
df_list = [pd.read_pickle(FOLDER + otb_data) for otb_data in h1_files]
df_sim = pd.concat(df_list, ignore_index=True)

df_sim.shape

CPU times: total: 500 ms
Wall time: 526 ms


(23391, 51)

## Adding calculated features

In [7]:
df_sim.shape

(23391, 51)

In [8]:
# Add AsOfDate

def apply_aod(row):
    stay_date = row["Date"]
    stly_stay_date = pd.to_datetime(row["STLY_Date"])
    n_days_b4 = int(row["DaysUntilArrival"])
    as_of_date = pd.to_datetime(
        stay_date - pd.DateOffset(n_days_b4), format=DATE_FMT
    )
    stly_as_of_date = pd.to_datetime(
        stly_stay_date - pd.DateOffset(n_days_b4), format=DATE_FMT
    )
    return as_of_date, stly_as_of_date

df_sim[["AsOfDate","STLY_AsOfDate"]] = df_sim[["Date", "STLY_Date", "DaysUntilArrival"]].apply(apply_aod, axis=1, result_type='expand')
df_sim.rename(columns={"Date": "StayDate", "STLY_Date": "STLY_StayDate"}, inplace=True)

df_sim.head()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,AsOfDate,STLY_AsOfDate
0,Sun,168.0,24346.11,21.0,129.0,19860.51,20.0,14.0,1710.3,1.0,0.0,0.0,0.0,25.0,2775.3,0.0,2015-08-02,2014-08-03,0,68,153.37,161.0,22662.86,119.0,17749.28,15.0,1909.3,0.0,0.0,27.0,3004.28,161.0,22923.55,120.0,18116.97,15.0,1909.3,0.0,0.0,26.0,2897.28,162.0,23255.24,123.0,18769.64,14.0,1710.3,0.0,0.0,25.0,2775.3,2015-08-02,2014-08-03
1,Mon,175.0,26061.4,25.0,130.0,20926.29,24.0,15.0,1814.28,1.0,0.0,0.0,0.0,30.0,3320.83,0.0,2015-08-03,2014-08-04,1,54,160.36,172.0,24998.88,124.0,19419.79,16.0,2013.28,0.0,0.0,32.0,3565.81,168.0,24572.57,121.0,19116.48,16.0,2013.28,0.0,0.0,31.0,3442.81,173.0,25712.31,128.0,20577.2,15.0,1814.28,0.0,0.0,30.0,3320.83,2015-08-02,2014-08-03
2,Tue,178.0,26730.73,26.0,128.0,20870.68,25.0,18.0,2360.88,1.0,0.0,0.0,0.0,32.0,3499.17,0.0,2015-08-04,2014-08-05,2,57,163.14,177.0,25901.61,126.0,20058.58,17.0,2098.88,0.0,0.0,34.0,3744.15,172.0,25296.7,122.0,19576.67,17.0,2098.88,0.0,0.0,33.0,3621.15,176.0,26399.13,126.0,20539.08,18.0,2360.88,0.0,0.0,32.0,3499.17,2015-08-02,2014-08-03
3,Wed,175.0,26469.93,28.0,130.0,21123.5,27.0,16.0,2136.16,1.0,0.0,0.0,0.0,29.0,3210.27,0.0,2015-08-05,2014-08-06,3,57,163.93,174.0,25803.81,128.0,20474.4,15.0,1874.16,0.0,0.0,31.0,3455.25,174.0,26247.68,128.0,20820.72,15.0,1874.16,0.0,0.0,31.0,3552.8,174.0,26349.33,129.0,21002.9,16.0,2136.16,0.0,0.0,29.0,3210.27,2015-08-02,2014-08-03
4,Thu,176.0,27065.32,31.0,133.0,21878.04,28.0,19.0,2506.66,3.0,0.0,0.0,0.0,24.0,2680.62,0.0,2015-08-06,2014-08-07,4,56,164.97,169.0,25175.89,125.0,20005.63,18.0,2244.66,0.0,0.0,26.0,2925.6,174.0,26695.28,129.0,21088.45,20.0,2705.66,0.0,0.0,25.0,2901.17,176.0,27065.32,133.0,21878.04,19.0,2506.66,0.0,0.0,24.0,2680.62,2015-08-02,2014-08-03


In [9]:
df_sim.shape
df_sim["AsOfDate"]

0       2015-08-02
1       2015-08-02
2       2015-08-02
3       2015-08-02
4       2015-08-02
           ...    
23386   2017-08-01
23387   2017-08-01
23388   2017-08-01
23389   2017-08-01
23390   2017-08-01
Name: AsOfDate, Length: 23391, dtype: datetime64[ns]

In [10]:
# add remaining supply ('RemSupply')
capacity = 187
df_sim["RemSupply"] = (
    capacity - df_sim.RoomsOTB.astype(int) + df_sim.CxlForecast.astype(int)
)

In [11]:
df_sim.shape

(23391, 54)

In [12]:
# add one-hot-encoded DOW ('Day of Week') columns

ohe_dow = pd.get_dummies(df_sim.DOW, drop_first=True)
dow_ohe_cols = list(ohe_dow.columns)
df_sim[dow_ohe_cols] = ohe_dow

In [13]:
df_sim.shape

(23391, 60)

In [14]:
# add NONTRN cols

df_sim["NONTRN_RoomsOTB"] = (
    df_sim.RoomsOTB - df_sim.TRN_RoomsOTB
)
df_sim["NONTRN_RevOTB"] = df_sim.RevOTB - df_sim.TRN_RevOTB
df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
df_sim["NONTRN_CxlForecast"] = df_sim.CxlForecast - df_sim.TRN_CxlForecast

# df_sim["LYA_NONTRN_RoomsOTB"] = (
#     df_sim.LYA_TRNP_RoomsOTB + df_sim.LYA_GRP_RoomsOTB + df_sim.LYA_CNT_RoomsOTB
# )
# df_sim["LYA_NONTRN_RevOTB"] = df_sim.LYA_TRNP_RevOTB + df_sim.LYA_GRP_RevOTB + df_sim.LYA_CNT_RevOTB



In [15]:
df_sim.shape

(23391, 64)

In [16]:
len(ly_cols_agg)

10

In [17]:
tuple(np.zeros(7))

(np.float64(0.0),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.0),
 np.float64(0.0))

In [18]:
# Add last-year actual columns ("LYA_")

def apply_ly_cols(row):
    try:
        stly_date = pd.to_datetime(row["STLY_StayDate"])
        cutoff_date = pd.to_datetime('2015-08-01')
        if stly_date < cutoff_date:
            return tuple(np.zeros(len(ly_cols_agg)))
        stly_date_str = stly_date.strftime(DATE_FMT)
        df_lya = list(h1_dbd.loc[stly_date_str, ly_cols_agg])
        return tuple(df_lya)
    except:
        return tuple(np.zeros(len(ly_cols_agg)))

ly_new_cols = ["LYA_" + col for col in ly_cols_agg]
df_sim[ly_new_cols] = df_sim[["STLY_StayDate"]].apply(apply_ly_cols, axis=1, result_type="expand")

df_sim.fillna(0, inplace=True)

df_sim.tail()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,LYA_TRNP_RoomsSold,LYA_TRNP_ADR,LYA_TRNP_RoomRev
23386,Sun,165.0,31468.98,41.0,119.0,24989.24,38.0,15.0,2597.24,3.0,4.0,628.03,0.0,27.0,3254.47,0.0,2017-08-27,2016-08-28,26,83,213.83,160.0,30428.85,115.0,24086.11,15.0,2597.24,4.0,628.03,26.0,3117.47,166.0,31804.99,117.0,24566.61,16.0,3085.88,5.0,763.03,28.0,3389.47,171.0,33073.09,122.0,25834.71,16.0,3085.88,5.0,763.03,28.0,3389.47,2017-08-01,2016-08-02,63,False,False,True,False,False,False,46.0,6479.74,140.86,3.0,178.0,168.09,29919.61,93.0,131.0,183.24,24004.17,24.0,171.53,4116.77
23387,Mon,169.0,32690.81,38.0,129.0,26766.44,38.0,12.0,2529.64,0.0,3.0,461.03,0.0,25.0,2933.7,0.0,2017-08-28,2016-08-29,27,93,212.04,159.0,30341.14,121.0,24954.77,10.0,1991.64,3.0,461.03,25.0,2933.7,171.0,32549.64,126.0,25943.27,13.0,2666.64,6.0,871.03,26.0,3068.7,174.0,33401.62,130.0,26935.25,13.0,2666.64,5.0,731.03,26.0,3068.7,2017-08-01,2016-08-02,56,True,False,False,False,False,False,40.0,5924.37,148.11,0.0,179.0,151.53,27123.13,100.0,131.0,164.99,21613.33,20.0,167.97,3359.43
23388,Tue,172.0,32283.29,42.0,132.0,26310.28,41.0,11.0,2359.04,0.0,3.0,461.03,0.0,26.0,3152.94,1.0,2017-08-29,2016-08-30,28,85,206.33,164.0,30462.79,126.0,25027.78,9.0,1821.04,3.0,461.03,26.0,3152.94,171.0,31589.6,126.0,24934.59,12.0,2496.04,6.0,871.03,27.0,3287.94,173.0,32158.98,129.0,25643.97,12.0,2496.04,5.0,731.03,27.0,3287.94,2017-08-01,2016-08-02,57,False,False,False,False,True,False,40.0,5973.01,149.33,1.0,178.0,145.09,25826.07,90.0,128.0,160.37,20526.94,20.0,156.71,3134.13
23389,Wed,163.0,29308.46,41.0,122.0,23261.08,39.0,11.0,2394.66,0.0,5.0,724.03,1.0,25.0,2928.69,1.0,2017-08-30,2016-08-31,29,68,201.06,159.0,28294.17,118.0,22246.79,11.0,2394.66,5.0,724.03,25.0,2928.69,164.0,28930.08,119.0,22389.7,12.0,2531.66,7.0,945.03,26.0,3063.69,173.0,30708.25,128.0,24167.87,12.0,2531.66,7.0,945.03,26.0,3063.69,2017-08-01,2016-08-02,65,False,False,False,False,False,True,41.0,6047.38,147.5,2.0,171.0,137.88,23577.22,76.0,126.0,151.21,19052.76,12.0,177.46,2129.53
23390,Thu,148.0,25598.71,41.0,116.0,21552.53,39.0,5.0,908.66,0.0,4.0,598.73,1.0,23.0,2538.79,1.0,2017-08-31,2016-09-01,30,57,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,2017-08-01,2016-08-02,80,False,False,False,True,False,False,32.0,4046.18,126.44,2.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66,15.0,168.48,2527.23


In [19]:
actual_cols = ['RoomsSold', "ADR", "RoomRev", "NumCancels"]
def apply_ty_actuals(row):
    date = row["StayDate"]
    date_str = dt.datetime.strftime(date, format=DATE_FMT)
    results = list(h1_dbd.loc[date_str, actual_cols])
    return tuple(results)

new_actual_cols = ["ACTUAL_" + col for col in actual_cols]
df_sim[new_actual_cols] = df_sim[["StayDate"]].apply(apply_ty_actuals, axis=1, result_type="expand")

df_sim.fillna(0, inplace=True)

df_sim.tail()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,LYA_TRNP_RoomsSold,LYA_TRNP_ADR,LYA_TRNP_RoomRev,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels
23386,Sun,165.0,31468.98,41.0,119.0,24989.24,38.0,15.0,2597.24,3.0,4.0,628.03,0.0,27.0,3254.47,0.0,2017-08-27,2016-08-28,26,83,213.83,160.0,30428.85,115.0,24086.11,15.0,2597.24,4.0,628.03,26.0,3117.47,166.0,31804.99,117.0,24566.61,16.0,3085.88,5.0,763.03,28.0,3389.47,171.0,33073.09,122.0,25834.71,16.0,3085.88,5.0,763.03,28.0,3389.47,2017-08-01,2016-08-02,63,False,False,True,False,False,False,46.0,6479.74,140.86,3.0,178.0,168.09,29919.61,93.0,131.0,183.24,24004.17,24.0,171.53,4116.77,179.0,192.15,34394.38,108.0
23387,Mon,169.0,32690.81,38.0,129.0,26766.44,38.0,12.0,2529.64,0.0,3.0,461.03,0.0,25.0,2933.7,0.0,2017-08-28,2016-08-29,27,93,212.04,159.0,30341.14,121.0,24954.77,10.0,1991.64,3.0,461.03,25.0,2933.7,171.0,32549.64,126.0,25943.27,13.0,2666.64,6.0,871.03,26.0,3068.7,174.0,33401.62,130.0,26935.25,13.0,2666.64,5.0,731.03,26.0,3068.7,2017-08-01,2016-08-02,56,True,False,False,False,False,False,40.0,5924.37,148.11,0.0,179.0,151.53,27123.13,100.0,131.0,164.99,21613.33,20.0,167.97,3359.43,174.0,190.89,33215.19,125.0
23388,Tue,172.0,32283.29,42.0,132.0,26310.28,41.0,11.0,2359.04,0.0,3.0,461.03,0.0,26.0,3152.94,1.0,2017-08-29,2016-08-30,28,85,206.33,164.0,30462.79,126.0,25027.78,9.0,1821.04,3.0,461.03,26.0,3152.94,171.0,31589.6,126.0,24934.59,12.0,2496.04,6.0,871.03,27.0,3287.94,173.0,32158.98,129.0,25643.97,12.0,2496.04,5.0,731.03,27.0,3287.94,2017-08-01,2016-08-02,57,False,False,False,False,True,False,40.0,5973.01,149.33,1.0,178.0,145.09,25826.07,90.0,128.0,160.37,20526.94,20.0,156.71,3134.13,173.0,185.95,32169.28,122.0
23389,Wed,163.0,29308.46,41.0,122.0,23261.08,39.0,11.0,2394.66,0.0,5.0,724.03,1.0,25.0,2928.69,1.0,2017-08-30,2016-08-31,29,68,201.06,159.0,28294.17,118.0,22246.79,11.0,2394.66,5.0,724.03,25.0,2928.69,164.0,28930.08,119.0,22389.7,12.0,2531.66,7.0,945.03,26.0,3063.69,173.0,30708.25,128.0,24167.87,12.0,2531.66,7.0,945.03,26.0,3063.69,2017-08-01,2016-08-02,65,False,False,False,False,False,True,41.0,6047.38,147.5,2.0,171.0,137.88,23577.22,76.0,126.0,151.21,19052.76,12.0,177.46,2129.53,174.0,176.95,30788.8,109.0
23390,Thu,148.0,25598.71,41.0,116.0,21552.53,39.0,5.0,908.66,0.0,4.0,598.73,1.0,23.0,2538.79,1.0,2017-08-31,2016-09-01,30,57,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,2017-08-01,2016-08-02,80,False,False,False,True,False,False,32.0,4046.18,126.44,2.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66,15.0,168.48,2527.23,170.0,171.07,29082.2,101.0


In [20]:
df_sim["AsOfDate"]

0       2015-08-02
1       2015-08-02
2       2015-08-02
3       2015-08-02
4       2015-08-02
           ...    
23386   2017-08-01
23387   2017-08-01
23388   2017-08-01
23389   2017-08-01
23390   2017-08-01
Name: AsOfDate, Length: 23391, dtype: datetime64[ns]

In [21]:
mask = df_sim.StayDate == '2017-08-09'
df_sim[mask][["ACTUAL_RoomsSold"]]

Unnamed: 0,ACTUAL_RoomsSold
22655,183.0
22686,183.0
22717,183.0
22748,183.0
22779,183.0
22810,183.0
22841,183.0
22872,183.0
22903,183.0
22934,183.0


In [22]:
h1_dbd.loc["2017-08-09"]

DOW                                 Wed
Occ                                0.98
RoomsSold                         183.0
ADR                              201.27
RoomRev                        36832.94
RevPAR                           196.97
NumCancels                        132.0
TRN_RoomsSold                     125.0
TRN_ADR                          218.29
TRN_RoomRev                    27285.73
GRP_RoomsSold                       7.0
GRP_ADR                           127.0
GRP_RoomRev                      889.02
TRNP_RoomsSold                     26.0
TRNP_ADR                         194.72
TRNP_RoomRev                    5062.63
CNT_RoomsSold                      25.0
CNT_ADR                          143.82
CNT_RoomRev                     3595.56
WE                                False
WD                                 True
STLY_Date           2016-08-10 00:00:00
NONTRN_RoomsSold                   58.0
NONTRN_RoomRev                  9547.21
NONTRN_ADR                       164.61


In [23]:
h1_dbd.columns

Index(['DOW', 'Occ', 'RoomsSold', 'ADR', 'RoomRev', 'RevPAR', 'NumCancels',
       'TRN_RoomsSold', 'TRN_ADR', 'TRN_RoomRev', 'GRP_RoomsSold', 'GRP_ADR',
       'GRP_RoomRev', 'TRNP_RoomsSold', 'TRNP_ADR', 'TRNP_RoomRev',
       'CNT_RoomsSold', 'CNT_ADR', 'CNT_RoomRev', 'WE', 'WD', 'STLY_Date',
       'NONTRN_RoomsSold', 'NONTRN_RoomRev', 'NONTRN_ADR'],
      dtype='object')

In [24]:
df_sim.columns
# df_sim["AsOfDate"]

Index(['DOW', 'RoomsOTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
       'TRN_RevOTB', 'TRN_CxlForecast', 'TRNP_RoomsOTB', 'TRNP_RevOTB',
       'TRNP_CxlForecast', 'GRP_RoomsOTB', 'GRP_RevOTB', 'GRP_CxlForecast',
       'CNT_RoomsOTB', 'CNT_RevOTB', 'CNT_CxlForecast', 'StayDate',
       'STLY_StayDate', 'DaysUntilArrival', 'Realized_Cxls', 'SellingPrice',
       'TM30_RoomsOTB', 'TM30_RevOTB', 'TM30_TRN_RoomsOTB', 'TM30_TRN_RevOTB',
       'TM30_TRNP_RoomsOTB', 'TM30_TRNP_RevOTB', 'TM30_GRP_RoomsOTB',
       'TM30_GRP_RevOTB', 'TM30_CNT_RoomsOTB', 'TM30_CNT_RevOTB',
       'TM15_RoomsOTB', 'TM15_RevOTB', 'TM15_TRN_RoomsOTB', 'TM15_TRN_RevOTB',
       'TM15_TRNP_RoomsOTB', 'TM15_TRNP_RevOTB', 'TM15_GRP_RoomsOTB',
       'TM15_GRP_RevOTB', 'TM15_CNT_RoomsOTB', 'TM15_CNT_RevOTB',
       'TM05_RoomsOTB', 'TM05_RevOTB', 'TM05_TRN_RoomsOTB', 'TM05_TRN_RevOTB',
       'TM05_TRNP_RoomsOTB', 'TM05_TRNP_RevOTB', 'TM05_GRP_RoomsOTB',
       'TM05_GRP_RevOTB', 'TM05_CNT_RoomsOTB', 'TM05_CNT_RevO

In [25]:
df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
# df_sim["TM30_NONTRN_RevOTB"]

In [26]:
df_sim["AsOfDate"]

0       2015-08-02
1       2015-08-02
2       2015-08-02
3       2015-08-02
4       2015-08-02
           ...    
23386   2017-08-01
23387   2017-08-01
23388   2017-08-01
23389   2017-08-01
23390   2017-08-01
Name: AsOfDate, Length: 23391, dtype: datetime64[ns]

In [27]:
# Calculate ADR for all segments first

df_sim["ADR_OTB"] = round(df_sim["RevOTB"] / df_sim["RoomsOTB"], 2)
df_sim["TRN_ADR_OTB"] = round(df_sim["TRN_RevOTB"] / df_sim["TRN_RoomsOTB"], 2)
df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)

# get recent pickup (tminus) columns
tms = ["TM30_", "TM15_", "TM05_"]
segs = ["", "TRN_"] # "" for total hotel

for tm in tms:
    # Calculate ADR for tminus windows first
    df_sim[tm + "ADR_OTB"] = round(df_sim[tm + "RevOTB"] / df_sim[tm + "RoomsOTB"], 2)
    df_sim[tm + "TRN_ADR_OTB"] = round(df_sim[tm + "TRN_RevOTB"] / df_sim[tm + "TRN_RoomsOTB"], 2)

    # Calculate NONTRN ADR for tminus windows
    # df_sim[tm + "NONTRN_ADR_OTB"] = round(df_sim[tm + "NONTRN_RevOTB"] / df_sim[tm + "NONTRN_RoomsOTB"], 2)
    
    for seg in segs:
        # Calculate pickup stats
        df_sim[tm + seg + "RoomsPickup"] = round(
            df_sim[seg + "RoomsOTB"] - df_sim[tm + seg + "RoomsOTB"], 2
        )
        df_sim[tm + seg + "RevPickup"] = round(
            df_sim[seg + "RevOTB"] - df_sim[tm + seg + "RevOTB"], 2
        )
        df_sim[tm + seg + "ADR_Pickup"] = round(
            df_sim[seg + "ADR_OTB"] - df_sim[tm + seg + "ADR_OTB"], 2
        )
    
    # Calculate NONTRN pickup stats
    tm_nontrn_rooms_otb = tm + "NONTRN_RoomsOTB"
    nontrn_rooms_otb = "NONTRN_RoomsOTB"
    tm_nontrn_rev_otb = tm + "NONTRN_RevOTB"
    nontrn_rev_otb = "NONTRN_RevOTB"
    tm_nontrn_adr_otb = tm + "NONTRN_ADR_OTB"
    nontrn_adr_otb = "NONTRN_ADR_OTB"

    if nontrn_rooms_otb in df_sim.columns:
        if tm_nontrn_rooms_otb not in df_sim.columns:
            df_sim[tm_nontrn_rooms_otb] = 0  # Fill missing column with 0
        df_sim[tm + "NONTRN_RoomsPickup"] = (
            df_sim[nontrn_rooms_otb] - df_sim[tm_nontrn_rooms_otb]
        )
    if nontrn_rev_otb in df_sim.columns:
        if tm_nontrn_rev_otb not in df_sim.columns:
            df_sim[tm_nontrn_rev_otb] = 0  # Fill missing column with 0
        df_sim[tm + "NONTRN_RevPickup"] = (
            df_sim[nontrn_rev_otb] - df_sim[tm_nontrn_rev_otb]
        )
    if nontrn_adr_otb in df_sim.columns:
        if tm_nontrn_adr_otb not in df_sim.columns:
            df_sim[tm_nontrn_adr_otb] = 0  # Fill missing column with 0
        df_sim[tm + "NONTRN_ADR_Pickup"] = (
            df_sim[nontrn_adr_otb] - df_sim[tm_nontrn_adr_otb]
        )

df_sim.head()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,LYA_TRNP_RoomsSold,LYA_TRNP_ADR,LYA_TRNP_RoomRev,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,ADR_OTB,TRN_ADR_OTB,TM30_ADR_OTB,TM30_TRN_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsOTB,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevOTB,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_OTB,TM30_NONTRN_ADR_Pickup,TM15_ADR_OTB,TM15_TRN_ADR_OTB,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsOTB,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevOTB,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_OTB,TM15_NONTRN_ADR_Pickup,TM05_ADR_OTB,TM05_TRN_ADR_OTB,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsOTB,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevOTB,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_OTB,TM05_NONTRN_ADR_Pickup
0,Sun,168.0,24346.11,21.0,129.0,19860.51,20.0,14.0,1710.3,1.0,0.0,0.0,0.0,25.0,2775.3,0.0,2015-08-02,2014-08-03,0,68,153.37,161.0,22662.86,119.0,17749.28,15.0,1909.3,0.0,0.0,27.0,3004.28,161.0,22923.55,120.0,18116.97,15.0,1909.3,0.0,0.0,26.0,2897.28,162.0,23255.24,123.0,18769.64,14.0,1710.3,0.0,0.0,25.0,2775.3,2015-08-02,2014-08-03,40,False,False,True,False,False,False,39.0,4485.6,115.02,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,168.0,144.92,24346.11,68.0,144.92,153.96,140.76,149.15,7.0,1683.25,4.16,10.0,2111.23,4.81,0,39.0,0,4485.6,0,115.02,142.38,150.97,7.0,1422.56,2.54,9.0,1743.54,2.99,0,39.0,0,4485.6,0,115.02,143.55,152.6,6.0,1090.87,1.37,6.0,1090.87,1.36,0,39.0,0,4485.6,0,115.02
1,Mon,175.0,26061.4,25.0,130.0,20926.29,24.0,15.0,1814.28,1.0,0.0,0.0,0.0,30.0,3320.83,0.0,2015-08-03,2014-08-04,1,54,160.36,172.0,24998.88,124.0,19419.79,16.0,2013.28,0.0,0.0,32.0,3565.81,168.0,24572.57,121.0,19116.48,16.0,2013.28,0.0,0.0,31.0,3442.81,173.0,25712.31,128.0,20577.2,15.0,1814.28,0.0,0.0,30.0,3320.83,2015-08-02,2014-08-03,37,True,False,False,False,False,False,45.0,5135.11,114.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,178.0,149.32,26579.6,54.0,148.92,160.97,145.34,156.61,3.0,1062.52,3.58,6.0,1506.5,4.36,0,45.0,0,5135.11,0,114.11,146.27,157.99,7.0,1488.83,2.65,9.0,1809.81,2.98,0,45.0,0,5135.11,0,114.11,148.63,160.76,2.0,349.09,0.29,2.0,349.09,0.21,0,45.0,0,5135.11,0,114.11
2,Tue,178.0,26730.73,26.0,128.0,20870.68,25.0,18.0,2360.88,1.0,0.0,0.0,0.0,32.0,3499.17,0.0,2015-08-04,2014-08-05,2,57,163.14,177.0,25901.61,126.0,20058.58,17.0,2098.88,0.0,0.0,34.0,3744.15,172.0,25296.7,122.0,19576.67,17.0,2098.88,0.0,0.0,33.0,3621.15,176.0,26399.13,126.0,20539.08,18.0,2360.88,0.0,0.0,32.0,3499.17,2015-08-02,2014-08-03,35,False,False,False,False,True,False,50.0,5860.05,117.2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.0,151.57,27585.83,58.0,150.17,163.05,146.34,159.2,1.0,829.12,3.83,2.0,812.1,3.85,0,50.0,0,5860.05,0,117.2,147.07,160.46,6.0,1434.03,3.1,6.0,1294.01,2.59,0,50.0,0,5860.05,0,117.2,150.0,163.01,2.0,331.6,0.17,2.0,331.6,0.04,0,50.0,0,5860.05,0,117.2
3,Wed,175.0,26469.93,28.0,130.0,21123.5,27.0,16.0,2136.16,1.0,0.0,0.0,0.0,29.0,3210.27,0.0,2015-08-05,2014-08-06,3,57,163.93,174.0,25803.81,128.0,20474.4,15.0,1874.16,0.0,0.0,31.0,3455.25,174.0,26247.68,128.0,20820.72,15.0,1874.16,0.0,0.0,31.0,3552.8,174.0,26349.33,129.0,21002.9,16.0,2136.16,0.0,0.0,29.0,3210.27,2015-08-02,2014-08-03,40,False,False,False,False,False,True,45.0,5346.43,118.81,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.0,152.92,27831.73,59.0,151.26,162.49,148.3,159.96,1.0,666.12,2.96,2.0,649.1,2.53,0,45.0,0,5346.43,0,118.81,150.85,162.66,1.0,222.25,0.41,2.0,302.78,-0.17,0,45.0,0,5346.43,0,118.81,151.43,162.81,1.0,120.6,-0.17,1.0,120.6,-0.32,0,45.0,0,5346.43,0,118.81
4,Thu,176.0,27065.32,31.0,133.0,21878.04,28.0,19.0,2506.66,3.0,0.0,0.0,0.0,24.0,2680.62,0.0,2015-08-06,2014-08-07,4,56,164.97,169.0,25175.89,125.0,20005.63,18.0,2244.66,0.0,0.0,26.0,2925.6,174.0,26695.28,129.0,21088.45,20.0,2705.66,0.0,0.0,25.0,2901.17,176.0,27065.32,133.0,21878.04,19.0,2506.66,0.0,0.0,24.0,2680.62,2015-08-02,2014-08-03,42,False,False,False,True,False,False,43.0,5187.28,120.63,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,180.0,155.27,27947.92,57.0,153.78,164.5,148.97,160.05,7.0,1889.43,4.81,8.0,1872.41,4.45,0,43.0,0,5187.28,0,120.63,153.42,163.48,2.0,370.04,0.36,4.0,789.59,1.02,0,43.0,0,5187.28,0,120.63,153.78,164.5,0.0,0.0,0.0,0.0,0.0,0.0,0,43.0,0,5187.28,0,120.63


In [28]:
df_sim.shape
df_sim["AsOfDate"]

0       2015-08-02
1       2015-08-02
2       2015-08-02
3       2015-08-02
4       2015-08-02
           ...    
23386   2017-08-01
23387   2017-08-01
23388   2017-08-01
23389   2017-08-01
23390   2017-08-01
Name: AsOfDate, Length: 23391, dtype: datetime64[ns]

In [29]:
# add gap to LYA columns (by segment)
# must be done AFTER NONTRN cols added
df_sim["RoomsGapToLYA"] = df_sim.LYA_RoomsSold - df_sim.RoomsOTB
df_sim["RevGapToLYA"] = df_sim.LYA_RoomRev - df_sim.RevOTB
df_sim["ADR_GapToLYA"] = df_sim.LYA_ADR - df_sim.ADR_OTB

df_sim["TRN_RoomsGapToLYA"] = df_sim.LYA_TRN_RoomsSold - df_sim.TRN_RoomsOTB
df_sim["TRN_RevGapToLYA"] = df_sim.LYA_TRN_RoomRev - df_sim.TRN_RevOTB
df_sim["TRN_ADR_GapToLYA"] = df_sim.LYA_TRN_ADR - df_sim.TRN_ADR_OTB

df_sim["NONTRN_RoomsGapToLYA"] = df_sim["RoomsGapToLYA"] - df_sim["TRN_RoomsGapToLYA"]
df_sim["NONTRN_RevGapToLYA"] = df_sim["RevGapToLYA"] - df_sim["TRN_RevGapToLYA"]
df_sim["NONTRN_ADR_GapToLYA"] = df_sim["ADR_GapToLYA"] - df_sim["TRN_ADR_GapToLYA"]

In [30]:
df_sim.shape

(23391, 131)

In [31]:
df_sim.columns

Index(['DOW', 'RoomsOTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
       'TRN_RevOTB', 'TRN_CxlForecast', 'TRNP_RoomsOTB', 'TRNP_RevOTB',
       'TRNP_CxlForecast',
       ...
       'TM05_NONTRN_ADR_Pickup', 'RoomsGapToLYA', 'RevGapToLYA',
       'ADR_GapToLYA', 'TRN_RoomsGapToLYA', 'TRN_RevGapToLYA',
       'TRN_ADR_GapToLYA', 'NONTRN_RoomsGapToLYA', 'NONTRN_RevGapToLYA',
       'NONTRN_ADR_GapToLYA'],
      dtype='object', length=131)

In [32]:
# remove all non-gap, non-pickup actual/tminus columns
# I will want to move this down in our script to combine with removing stly cols (we only want pace)
# removing them here just to make it cleaner

df_sim.drop(columns=drop_cols_agg, inplace=True, errors='ignore')
df_sim.columns

Index(['DOW', 'GRP_RoomsOTB', 'GRP_RevOTB', 'GRP_CxlForecast', 'CNT_RoomsOTB',
       'CNT_RevOTB', 'CNT_CxlForecast', 'STLY_StayDate', 'DaysUntilArrival',
       'TM30_RoomsOTB', 'TM30_RevOTB', 'TM30_TRN_RoomsOTB', 'TM30_TRN_RevOTB',
       'TM30_TRNP_RoomsOTB', 'TM30_TRNP_RevOTB', 'TM30_GRP_RoomsOTB',
       'TM30_GRP_RevOTB', 'TM30_CNT_RoomsOTB', 'TM30_CNT_RevOTB',
       'TM15_RoomsOTB', 'TM15_RevOTB', 'TM15_TRN_RoomsOTB', 'TM15_TRN_RevOTB',
       'TM15_TRNP_RoomsOTB', 'TM15_TRNP_RevOTB', 'TM15_GRP_RoomsOTB',
       'TM15_GRP_RevOTB', 'TM15_CNT_RoomsOTB', 'TM15_CNT_RevOTB',
       'TM05_RoomsOTB', 'TM05_RevOTB', 'TM05_TRN_RoomsOTB', 'TM05_TRN_RevOTB',
       'TM05_TRNP_RoomsOTB', 'TM05_TRNP_RevOTB', 'TM05_GRP_RoomsOTB',
       'TM05_GRP_RevOTB', 'TM05_CNT_RoomsOTB', 'TM05_CNT_RevOTB',
       'STLY_AsOfDate', 'Mon', 'Sat', 'Sun', 'Thu', 'Tue', 'Wed',
       'NONTRN_RoomsOTB', 'NONTRN_RevOTB', 'NONTRN_ADR_OTB',
       'NONTRN_CxlForecast', 'LYA_RoomsSold', 'LYA_ADR', 'LYA_RoomRev',


In [33]:
df_sim.sample(random_state=0)

Unnamed: 0,DOW,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,STLY_StayDate,DaysUntilArrival,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,STLY_AsOfDate,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,LYA_TRNP_RoomsSold,LYA_TRNP_ADR,LYA_TRNP_RoomRev,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TM30_ADR_OTB,TM30_TRN_ADR_OTB,TM30_NONTRN_RoomsOTB,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevOTB,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_OTB,TM30_NONTRN_ADR_Pickup,TM15_ADR_OTB,TM15_TRN_ADR_OTB,TM15_NONTRN_RoomsOTB,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevOTB,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_OTB,TM15_NONTRN_ADR_Pickup,TM05_ADR_OTB,TM05_TRN_ADR_OTB,TM05_NONTRN_RoomsOTB,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevOTB,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_OTB,TM05_NONTRN_ADR_Pickup,RoomsGapToLYA,RevGapToLYA,ADR_GapToLYA,TRN_RoomsGapToLYA,TRN_RevGapToLYA,TRN_ADR_GapToLYA,NONTRN_RoomsGapToLYA,NONTRN_RevGapToLYA,NONTRN_ADR_GapToLYA
3633,Thu,0.0,0.0,0.0,1.0,35.7,0.0,2014-12-11,17,7.0,289.63,6.0,253.93,0.0,0.0,0.0,0.0,1.0,35.7,18.0,659.75,12.0,493.55,5.0,130.5,0.0,0.0,1.0,35.7,30.0,1125.9,24.0,959.7,5.0,130.5,0.0,0.0,1.0,35.7,2014-11-24,False,False,False,True,False,False,6.0,166.2,27.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0,38.65,1778.1,4.0,41.38,42.32,0,6.0,0,166.2,0,27.7,36.65,41.13,0,6.0,0,166.2,0,27.7,37.53,39.99,0,6.0,0,166.2,0,27.7,-18.0,-659.75,-36.65,-12.0,-493.55,-41.13,-6.0,-166.2,4.48


In [34]:
# df_sim.loc["2016-04-24"]

# EW- NEXT STEPS (THU 5PM)

1. drop unneeded, post-processed TM_nn columns in blank cell above (create list in agg_utils.py)
2. pull stly cols via merge below
3. calculate pace
4. drop unneeded, post-processed stly cols
5. add all of the steps in this notebook to agg.py
6. pull features from list at top of this NB
7. train/test split
8. linear regression (predict RoomsSold)
9. randomForest (predict RoomsSold)

**Time to pull STLY columns. I will accomplish this by merging df_sim on top of itself and pulling the below columns into the next year's row with the `'STLY_'` prefix.***

But before we do that, let's make sure we add in the ADR columns.

NEVERMIND - THIS STEP NEEDS TO COME LAST ONCE WE HAVE ALL OF THE OTHER COLUMNS

In [35]:
df_sim.head(2)

Unnamed: 0,DOW,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,STLY_StayDate,DaysUntilArrival,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,STLY_AsOfDate,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,LYA_TRNP_RoomsSold,LYA_TRNP_ADR,LYA_TRNP_RoomRev,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TM30_ADR_OTB,TM30_TRN_ADR_OTB,TM30_NONTRN_RoomsOTB,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevOTB,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_OTB,TM30_NONTRN_ADR_Pickup,TM15_ADR_OTB,TM15_TRN_ADR_OTB,TM15_NONTRN_RoomsOTB,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevOTB,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_OTB,TM15_NONTRN_ADR_Pickup,TM05_ADR_OTB,TM05_TRN_ADR_OTB,TM05_NONTRN_RoomsOTB,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevOTB,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_OTB,TM05_NONTRN_ADR_Pickup,RoomsGapToLYA,RevGapToLYA,ADR_GapToLYA,TRN_RoomsGapToLYA,TRN_RevGapToLYA,TRN_ADR_GapToLYA,NONTRN_RoomsGapToLYA,NONTRN_RevGapToLYA,NONTRN_ADR_GapToLYA
0,Sun,0.0,0.0,0.0,25.0,2775.3,0.0,2014-08-03,0,161.0,22662.86,119.0,17749.28,15.0,1909.3,0.0,0.0,27.0,3004.28,161.0,22923.55,120.0,18116.97,15.0,1909.3,0.0,0.0,26.0,2897.28,162.0,23255.24,123.0,18769.64,14.0,1710.3,0.0,0.0,25.0,2775.3,2014-08-03,False,False,True,False,False,False,39.0,4485.6,115.02,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,168.0,144.92,24346.11,68.0,140.76,149.15,0,39.0,0,4485.6,0,115.02,142.38,150.97,0,39.0,0,4485.6,0,115.02,143.55,152.6,0,39.0,0,4485.6,0,115.02,-168.0,-24346.11,-144.92,-129.0,-19860.51,-153.96,-39.0,-4485.6,9.04
1,Mon,0.0,0.0,0.0,30.0,3320.83,0.0,2014-08-04,1,172.0,24998.88,124.0,19419.79,16.0,2013.28,0.0,0.0,32.0,3565.81,168.0,24572.57,121.0,19116.48,16.0,2013.28,0.0,0.0,31.0,3442.81,173.0,25712.31,128.0,20577.2,15.0,1814.28,0.0,0.0,30.0,3320.83,2014-08-03,True,False,False,False,False,False,45.0,5135.11,114.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,178.0,149.32,26579.6,54.0,145.34,156.61,0,45.0,0,5135.11,0,114.11,146.27,157.99,0,45.0,0,5135.11,0,114.11,148.63,160.76,0,45.0,0,5135.11,0,114.11,-175.0,-26061.4,-148.92,-130.0,-20926.29,-160.97,-45.0,-5135.11,12.05


In [36]:
df_sim.shape

(23391, 97)

In [37]:
# pull STLY columns with self-merge to STLY date

# first, we need to create unique ID col (id) for each as-of-date/stay-date combo
# then, we manipulate strings to add a stly_id column that we can use as right key for our merge

df_sim_ids = df_sim.AsOfDate.astype(str) + ' - ' + df_sim.StayDate.astype(str)
df_sim.insert(0, "id", df_sim_ids)

df_sim_stly_ids = df_sim.STLY_AsOfDate.astype(str) + ' - ' + df_sim.STLY_StayDate.astype(str)
df_sim.insert(1, "stly_id", df_sim_stly_ids)
df_sim.head()

AttributeError: 'DataFrame' object has no attribute 'AsOfDate'

In [None]:
df_sim["DayOfWeek"] df_sim.StayDate.map(lambda x: dt.datetime.strftime(x, format="%a"))

In [None]:
df_sim.shape

In [None]:
# self-join df_sim to pull stly stats using the above keys

df_sim = df_sim.merge(df_sim[stly_cols_agg], left_on='stly_id', right_on='id', suffixes=(None, "_STLY"))
df_sim.head(2)

In [None]:
df_sim.shape

In [None]:
df_sim[['id', 'stly_id', 'AsOfDate', 'StayDate', 'AsOfDate_STLY', 'StayDate_STLY', 'RoomsOTB_STLY', 'RevOTB_STLY']]




In [None]:
len(df_sim[df_sim.AsOfDate_STLY.isna()])

In [None]:
len(df_sim.dropna())

In [None]:
[c for c in df_sim.columns if c[-5:] == '_STLY']

In [None]:
df_sim.shape

In [None]:
df_test_stly = pd.read_pickle("./sims/pickle/h1_sim_2016-08-02.pick")
df_test_stly.loc["2016-08-06"]