In [1]:
import os
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import OneHotEncoder as ohe

from dbds import generate_hotel_dfs
from sim import generate_simulation
from model_cancellations import get_otb_res, predict_cancellations
from agg_utils import stly_cols, ly_cols

pd.options.display.max_rows = 150
pd.options.display.max_columns = 250

DATE_FMT = "%Y-%m-%d"
h1_capacity = 187
h2_capacity = 226
AOD = "2017-08-01"
AOD_dt = pd.to_datetime(AOD)

h1_res = pd.read_pickle("pickle/h1_res.pick")
h2_res = pd.read_pickle("pickle/h2_res.pick")
h1_dbd = pd.read_pickle("pickle/h1_dbd.pick")
h2_dbd = pd.read_pickle("pickle/h2_dbd.pick")

In [2]:
# h1_res, h1_dbd = generate_hotel_dfs("../data/H1.csv", capacity=h1_capacity)
# h2_res, h2_dbd = generate_hotel_dfs("../data/H2.csv", capacity=h2_capacity)

# h1_res.to_pickle("pickle/h1_res.pick")
# h1_dbd.to_pickle("pickle/h1_dbd.pick")
# h2_res.to_pickle("pickle/h2_res.pick")
# h2_dbd.to_pickle("pickle/h2_dbd.pick")

In [3]:
# features needed:
features = ["DaysUntilArrival",
"RoomsOTB",
"RevOTB",
"CxlForecast",
"RemSupply",
"SellingPrice",
"STLY_SellingPrice",
"TRN_RoomsOTB",
"TRN_RevOTB",
"TRN_CxlForecast",
"LYA_RoomsSold",
"LYA_ADR",
"LYA_RoomRev",
"WE",
"WD",
"Mon",
"Sat",
"Sun",
"Thu",
"Tue",
"Wed",
"RoomsGapToLYA",
"ADR_GapToLYA",
"TM30_TRN_RoomsPickup",
"TM30_TRN_ADR_Pickup",
"TM30_TRN_RevPickup",
"TM15_RoomsPickup",
"TM15_ADR_Pickup",
"TM15_RevPickup",
"TM15_TRN_RoomsPickup",
"TM15_TRN_ADR_Pickup",
"TM15_TRN_RevPickup",
"TM05_RoomsPickup",
"TM05_ADR_Pickup",
"TM05_RevPickup",
"TM05_TRN_RoomsPickup",
"TM05_TRN_ADR_Pickup",
"TM05_TRN_RevPickup",
"RoomsOTB_Pace",
"ADR_OTB_Pace",
"RevOTB_Pace",
"TRN_RoomsOTB_Pace",
"TRN_ADR_OTB_Pace",
"TRN_RevOTB_Pace",
"TM30_RoomsPickup_Pace",
"TM30_ADR_Pickup_Pace",
"TM30_RevPickup_Pace",
"TM15_RoomsPickup_Pace",
"TM15_ADR_Pickup_Pace",
"TM15_RevPickup_Pace",
"TM05_RoomsPickup_Pace",
"TM05_ADR_Pickup_Pace",
"TM05_RevPickup_Pace",
"TM30_TRN_RoomsPickup_Pace",
"TM30_TRN_ADR_Pickup_Pace",
"TM30_TRN_RevPickup_Pace",
"TM15_TRN_RoomsPickup_Pace",
"TM15_TRN_ADR_Pickup_Pace",
"TM15_TRN_RevPickup_Pace",
"TM05_TRN_RoomsPickup_Pace",
"TM05_TRN_ADR_Pickup_Pace",
"TM05_TRN_RevPickup_Pace",
]

y_col = 'Actual_RoomsSold'

## Combine Files Generated by save_sims.py

In [4]:
# generate list of relevant files
import datetime as dt
hotel_num = 1
h = 'h' + str(hotel_num)
SIM_AOD = pd.to_datetime(dt.date(2017, 8, 1), format=DATE_FMT)
SIM_START = SIM_AOD - pd.DateOffset(364) # need exactly 364 days of actuals, the rest future-looking

FOLDER = "./sims/pickle/"
first_date = dt.date(2016, 8, 1)
lam_include = lambda x: x[:2] == h and pd.to_datetime(x[7:17]) >= SIM_START
h1_files = [f for f in os.listdir(FOLDER) if lam_include(f)]
h1_files.sort()
len(h1_files), h1_files[0], h1_files[-1] # note STLY date of 8/1/17 == 8/2/16 (matching weekday)

(395, 'h1_sim_2016-08-02.pick', 'h1_sim_2017-08-31.pick')

In [5]:
%%time
df_sim = pd.DataFrame()
for otb_data in h1_files:
    df_sim = df_sim.append(pd.read_pickle(FOLDER + otb_data))

df_sim.shape

CPU times: user 2.3 s, sys: 6.02 ms, total: 2.3 s
Wall time: 2.3 s


(12144, 53)

In [6]:
drop_cols = [
'TM05_TRNP_RoomsOTB',
'TM05_TRNP_RevOTB',
'TM05_GRP_RoomsOTB',
'TM05_GRP_RevOTB',
'TM05_CNT_RoomsOTB',
'TM05_CNT_RevOTB',
'TM15_TRNP_RoomsOTB',
'TM15_TRNP_RevOTB',
'TM15_GRP_RoomsOTB',
'TM15_GRP_RevOTB',
'TM15_CNT_RoomsOTB',
'TM15_CNT_RevOTB',
'TM30_TRNP_RoomsOTB',
'TM30_TRNP_RevOTB',
'TM30_GRP_RoomsOTB',
'TM30_GRP_RevOTB',
'TM30_CNT_RoomsOTB',
'TM30_CNT_RevOTB',
'TRNP_RoomsOTB',
'TRNP_RevOTB',
'TRNP_CxlForecast',
'GRP_RoomsOTB',
'GRP_RevOTB',
'GRP_CxlForecast',
'CNT_RoomsOTB',
'CNT_RevOTB',
'CNT_CxlForecast'
]

df_sim.drop(columns=drop_cols, inplace=True, errors='ignore')
df_sim.columns

Index(['DOW', 'RoomsOTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
       'TRN_RevOTB', 'TRN_CxlForecast', 'Date', 'WE', 'WD', 'STLY_Date',
       'DaysUntilArrival', 'ADR_OTB', 'SellingPrice', 'TM30_RoomsOTB',
       'TM30_RevOTB', 'TM30_TRN_RoomsOTB', 'TM30_TRN_RevOTB', 'TM15_RoomsOTB',
       'TM15_RevOTB', 'TM15_TRN_RoomsOTB', 'TM15_TRN_RevOTB', 'TM05_RoomsOTB',
       'TM05_RevOTB', 'TM05_TRN_RoomsOTB', 'TM05_TRN_RevOTB'],
      dtype='object')

## Adding calculated features

In [7]:
# Add AsOfDate

def apply_aod(row):
    stay_date = pd.to_datetime(row["Date"])
    n_days_b4 = int(row["DaysUntilArrival"])
    return pd.to_datetime(stay_date - pd.DateOffset(n_days_b4), format=DATE_FMT)

aods = df_sim[["Date", "DaysUntilArrival"]].apply(apply_aod, axis=1)

# using try/except so that we can re-run this cell (remove in script)
try: 
    df_sim.insert(0, column='AsOfDate', value=aods, allow_duplicates=False)
except:
    pass

df_sim.head()

Unnamed: 0,AsOfDate,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,Date,WE,WD,STLY_Date,DaysUntilArrival,ADR_OTB,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB
2016-08-02,2016-08-02,Tue,183.0,31110.89,30.0,159.0,27829.05,28.0,2016-08-02,False,False,2015-08-04,0,170.0,178.09,180.0,29880.26,156.0,26598.42,180.0,30125.1,156.0,26843.26,182.0,30598.64,158.0,27316.8
2016-08-03,2016-08-02,Wed,182.0,31660.62,35.0,159.0,28337.88,31.0,2016-08-03,False,False,2015-08-05,1,173.96,181.69,179.0,30517.59,157.0,27435.85,176.0,30361.96,154.0,27280.22,173.0,29906.87,151.0,26825.13
2016-08-04,2016-08-02,Thu,181.0,32922.17,35.0,151.0,28013.71,32.0,2016-08-04,False,False,2015-08-06,2,181.89,187.83,173.0,30621.96,145.0,26214.16,177.0,31801.65,149.0,27393.85,177.0,31930.67,149.0,27522.87
2016-08-05,2016-08-02,Fri,177.0,33224.35,30.0,143.0,27148.36,27.0,2016-08-05,True,False,2015-08-07,3,187.71,193.42,173.0,31314.92,143.0,26342.59,176.0,32622.33,144.0,27047.0,176.0,32956.31,142.0,26880.32
2016-08-06,2016-08-02,Sat,175.0,33130.01,28.0,143.0,27475.32,25.0,2016-08-06,True,False,2015-08-08,4,189.31,195.12,171.0,31330.05,143.0,26779.02,173.0,32274.77,143.0,27120.74,173.0,32632.9,141.0,26978.21


In [8]:
# add remaining supply ('RemSupply')
capacity = 187
df_sim["RemSupply"] = (
    capacity - df_sim.RoomsOTB.astype(int) + df_sim.CxlForecast.astype(int)
)

In [9]:
# add one-hot-encoded DOW ('Day of Week') columns

ohe_dow = pd.get_dummies(df_sim.DOW, drop_first=True)
dow_ohe_cols = list(ohe_dow.columns)
df_sim[dow_ohe_cols] = ohe_dow

In [10]:
# add NONTRN cols

df_sim["NONTRN_RoomsOTB"] = (
    df_sim.RoomsOTB - df_sim.TRN_RoomsOTB
)
df_sim["NONTRN_RevOTB"] = df_sim.RevOTB - df_sim.TRN_RevOTB
df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
df_sim["NONTRN_CxlForecast"] = df_sim.CxlForecast - df_sim.TRN_CxlForecast

# df_sim["LYA_NONTRN_RoomsOTB"] = (
#     df_sim.LYA_TRNP_RoomsOTB + df_sim.LYA_GRP_RoomsOTB + df_sim.LYA_CNT_RoomsOTB
# )
# df_sim["LYA_NONTRN_RevOTB"] = df_sim.LYA_TRNP_RevOTB + df_sim.LYA_GRP_RevOTB + df_sim.LYA_CNT_RevOTB



In [11]:
# Add ADR by segment for OTB, LYA, STLY cols (required for pace comparisons)


# df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
# df_sim["LYA_NONTRN_ADR_OTB"] = round(df_sim["LYA_NONTRN_RevOTB"] / df_sim["LYA_NONTRN_RoomsOTB"], 2)

In [12]:
# Add last-year actual columns ("LYA_")

def apply_ly_cols(row):
    stly_date = row["STLY_Date"]
    stly_date_str = datetime.datetime.strftime(stly_date, format=DATE_FMT)

    df_lya = list(h1_dbd.loc[stly_date_str, ly_cols])
    return tuple(df_lya)

ly_new_cols = ["LYA_" + col for col in ly_cols]
df_sim[ly_new_cols] = df_sim[["STLY_Date"]].apply(apply_ly_cols, axis=1, result_type="expand")

df_sim.fillna(0, inplace=True)

# add gap to LYA column 
df_sim["RoomsGapToLYA"] = df_sim.LYA_RoomsSold - df_sim.RoomsOTB
df_sim["ADR_GapToLYA"] = df_sim.LYA_ADR - df_sim.ADR_OTB

df_sim.head()

# df_sim["TRN_ADR_OTB"] = round(df_sim["TRN_RevOTB"] / df_sim["TRN_RoomsOTB"], 2)
# df_sim["LYA_ADR"] = round(df_sim.RevOTB / df_sim.RoomsOTB, 2)
# df_sim["LYA_TRN_ADR"] = round(df_sim["LYA_TRN_RoomRev"] / df_sim["LYA_TRN_RoomsSold"], 2)


Unnamed: 0,AsOfDate,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,Date,WE,WD,STLY_Date,DaysUntilArrival,ADR_OTB,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,RoomsGapToLYA,ADR_GapToLYA
2016-08-02,2016-08-02,Tue,183.0,31110.89,30.0,159.0,27829.05,28.0,2016-08-02,False,False,2015-08-04,0,170.0,178.09,180.0,29880.26,156.0,26598.42,180.0,30125.1,156.0,26843.26,182.0,30598.64,158.0,27316.8,34,0,0,0,0,1,0,24.0,3281.84,136.74,2.0,182.0,151.57,27585.83,58.0,132.0,164.59,21725.78,-1.0,-18.43
2016-08-03,2016-08-02,Wed,182.0,31660.62,35.0,159.0,28337.88,31.0,2016-08-03,False,False,2015-08-05,1,173.96,181.69,179.0,30517.59,157.0,27435.85,176.0,30361.96,154.0,27280.22,173.0,29906.87,151.0,26825.13,40,0,0,0,0,0,1,23.0,3322.74,144.47,4.0,182.0,152.92,27831.73,59.0,137.0,164.13,22485.3,0.0,-21.04
2016-08-04,2016-08-02,Thu,181.0,32922.17,35.0,151.0,28013.71,32.0,2016-08-04,False,False,2015-08-06,2,181.89,187.83,173.0,30621.96,145.0,26214.16,177.0,31801.65,149.0,27393.85,177.0,31930.67,149.0,27522.87,41,0,0,0,1,0,0,30.0,4908.46,163.62,3.0,180.0,155.27,27947.92,57.0,137.0,166.14,22760.64,-1.0,-26.62
2016-08-05,2016-08-02,Fri,177.0,33224.35,30.0,143.0,27148.36,27.0,2016-08-05,True,False,2015-08-07,3,187.71,193.42,173.0,31314.92,143.0,26342.59,176.0,32622.33,144.0,27047.0,176.0,32956.31,142.0,26880.32,40,0,0,0,0,0,0,34.0,6075.99,178.71,3.0,182.0,153.31,27901.89,59.0,139.0,163.06,22665.76,5.0,-34.4
2016-08-06,2016-08-02,Sat,175.0,33130.01,28.0,143.0,27475.32,25.0,2016-08-06,True,False,2015-08-08,4,189.31,195.12,171.0,31330.05,143.0,26779.02,173.0,32274.77,143.0,27120.74,173.0,32632.9,141.0,26978.21,40,0,1,0,0,0,0,32.0,5654.69,176.71,3.0,181.0,156.18,28269.01,63.0,140.0,167.9,23505.41,6.0,-33.13


In [13]:
%%time
# get recent pickup (tminus) columns


# first need TRN_ADR

df_sim["TRN_ADR_OTB"] = round(df_sim["TRN_RevOTB"] / df_sim["TRN_RoomsOTB"])

# loop thru tminus windows (for total hotel & trn) & count bookings
tms = ["TM30_", "TM15_", "TM05_"]
segs = ["", "TRN_"] # "" for total hotel
# df_sim.drop(
#     columns=["TM30_Date", "TM15_Date", "TM05_Date"], inplace=True, errors="ignore"
# )

for tm in tms:
    for seg in segs:
        
        # add tm_seg_adr
        df_sim[tm + seg + "ADR_OTB"] = round(df_sim[tm + seg + "RevOTB"] / df_sim[tm + seg + "RoomsOTB"], 2)
        # and now segmented stats
        df_sim[tm + seg + "RoomsPickup"] = round(
            df_sim[seg + "RoomsOTB"] - df_sim[tm + seg + "RoomsOTB"], 2
        )
        df_sim[tm + seg + "RevPickup"] = round(
            df_sim[seg + "RevOTB"] - df_sim[tm + seg + "RevOTB"], 2
        )
        df_sim[tm + seg + "ADR_Pickup"] = round(
            df_sim[seg + "ADR_OTB"] - df_sim[tm + seg + "ADR_OTB"], 2
        )


    # back to outside loop (iterating thru tms)
    # add TM_NONTRN_OTB
    df_sim[tm +  "NONTRN_RoomsOTB"] = (
        df_sim[tm + "RoomsOTB"]
        - df_sim[tm + "TRN_RoomsOTB"]
    )
    df_sim[tm + "NONTRN_RevOTB"] = (
        df_sim[tm + "RevOTB"]
        - df_sim[tm + "TRN_RevOTB"]
    )
    df_sim[tm + "NONTRN_ADR_OTB"] = round(
        df_sim[tm + "NONTRN_RevOTB"] / df_sim[tm + "NONTRN_RoomsOTB"], 2
    )
    # add TM_NONTRN_OTB Pickup
    df_sim[tm +  "NONTRN_RoomsPickup"] = (
        df_sim["NONTRN_RoomsOTB"]
        - df_sim[tm + "NONTRN_RoomsOTB"]
    )
    df_sim[tm + "NONTRN_RevPickup"] = (
        df_sim["NONTRN_RevOTB"]
        - df_sim[tm + "NONTRN_RevOTB"]
    )
    df_sim[tm + "NONTRN_ADR_Pickup"] = (
        df_sim["NONTRN_ADR_OTB"]
        - df_sim[tm + "NONTRN_ADR_OTB"]
    )

df_sim.head()

CPU times: user 113 ms, sys: 391 µs, total: 114 ms
Wall time: 110 ms


Unnamed: 0,AsOfDate,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,Date,WE,WD,STLY_Date,DaysUntilArrival,ADR_OTB,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,RoomsGapToLYA,ADR_GapToLYA,TRN_ADR_OTB,TM30_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_ADR_OTB,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsOTB,TM30_NONTRN_RevOTB,TM30_NONTRN_ADR_OTB,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_Pickup,TM15_ADR_OTB,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_ADR_OTB,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsOTB,TM15_NONTRN_RevOTB,TM15_NONTRN_ADR_OTB,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_Pickup,TM05_ADR_OTB,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_ADR_OTB,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsOTB,TM05_NONTRN_RevOTB,TM05_NONTRN_ADR_OTB,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_Pickup
2016-08-02,2016-08-02,Tue,183.0,31110.89,30.0,159.0,27829.05,28.0,2016-08-02,False,False,2015-08-04,0,170.0,178.09,180.0,29880.26,156.0,26598.42,180.0,30125.1,156.0,26843.26,182.0,30598.64,158.0,27316.8,34,0,0,0,0,1,0,24.0,3281.84,136.74,2.0,182.0,151.57,27585.83,58.0,132.0,164.59,21725.78,-1.0,-18.43,175.0,166.0,3.0,1230.63,4.0,170.5,3.0,1230.63,4.5,24.0,3281.84,136.74,0.0,1.818989e-11,0.0,167.36,3.0,985.79,2.64,172.07,3.0,985.79,2.93,24.0,3281.84,136.74,0.0,1.818989e-11,0.0,168.12,1.0,512.25,1.88,172.89,1.0,512.25,2.11,24.0,3281.84,136.74,0.0,1.818989e-11,0.0
2016-08-03,2016-08-02,Wed,182.0,31660.62,35.0,159.0,28337.88,31.0,2016-08-03,False,False,2015-08-05,1,173.96,181.69,179.0,30517.59,157.0,27435.85,176.0,30361.96,154.0,27280.22,173.0,29906.87,151.0,26825.13,40,0,0,0,0,0,1,23.0,3322.74,144.47,4.0,182.0,152.92,27831.73,59.0,137.0,164.13,22485.3,0.0,-21.04,178.0,170.49,3.0,1143.03,3.47,174.75,2.0,902.03,3.25,22.0,3081.74,140.08,1.0,241.0,4.39,172.51,6.0,1298.66,1.45,177.14,5.0,1057.66,0.86,22.0,3081.74,140.08,1.0,241.0,4.39,172.87,9.0,1753.75,1.09,177.65,8.0,1512.75,0.35,22.0,3081.74,140.08,1.0,241.0,4.39
2016-08-04,2016-08-02,Thu,181.0,32922.17,35.0,151.0,28013.71,32.0,2016-08-04,False,False,2015-08-06,2,181.89,187.83,173.0,30621.96,145.0,26214.16,177.0,31801.65,149.0,27393.85,177.0,31930.67,149.0,27522.87,41,0,0,0,1,0,0,30.0,4908.46,163.62,3.0,180.0,155.27,27947.92,57.0,137.0,166.14,22760.64,-1.0,-26.62,186.0,177.01,8.0,2300.21,4.88,180.79,6.0,1799.55,5.21,28.0,4407.8,157.42,2.0,500.66,6.2,179.67,4.0,1120.52,2.22,183.85,2.0,619.86,2.15,28.0,4407.8,157.42,2.0,500.66,6.2,180.4,4.0,991.5,1.49,184.72,2.0,490.84,1.28,28.0,4407.8,157.42,2.0,500.66,6.2
2016-08-05,2016-08-02,Fri,177.0,33224.35,30.0,143.0,27148.36,27.0,2016-08-05,True,False,2015-08-07,3,187.71,193.42,173.0,31314.92,143.0,26342.59,176.0,32622.33,144.0,27047.0,176.0,32956.31,142.0,26880.32,40,0,0,0,0,0,0,34.0,6075.99,178.71,3.0,182.0,153.31,27901.89,59.0,139.0,163.06,22665.76,5.0,-34.4,190.0,181.01,4.0,1909.43,6.7,184.21,0.0,805.77,5.79,30.0,4972.33,165.74,4.0,1103.66,12.97,185.35,1.0,602.02,2.36,187.83,-1.0,101.36,2.17,32.0,5575.33,174.23,2.0,500.66,4.48,187.25,1.0,268.04,0.46,189.3,1.0,268.04,0.7,34.0,6075.99,178.71,0.0,2.182787e-11,0.0
2016-08-06,2016-08-02,Sat,175.0,33130.01,28.0,143.0,27475.32,25.0,2016-08-06,True,False,2015-08-08,4,189.31,195.12,171.0,31330.05,143.0,26779.02,173.0,32274.77,143.0,27120.74,173.0,32632.9,141.0,26978.21,40,0,1,0,0,0,0,32.0,5654.69,176.71,3.0,181.0,156.18,28269.01,63.0,140.0,167.9,23505.41,6.0,-33.13,192.0,183.22,4.0,1799.96,6.09,187.27,0.0,696.3,4.73,28.0,4551.03,162.54,4.0,1103.66,14.17,186.56,2.0,855.24,2.75,189.66,0.0,354.58,2.34,30.0,5154.03,171.8,2.0,500.66,4.91,188.63,2.0,497.11,0.68,191.33,2.0,497.11,0.67,32.0,5654.69,176.71,0.0,-3.637979e-12,0.0


In [14]:
df_sim.columns

Index(['AsOfDate', 'DOW', 'RoomsOTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
       'TRN_RevOTB', 'TRN_CxlForecast', 'Date', 'WE', 'WD', 'STLY_Date',
       'DaysUntilArrival', 'ADR_OTB', 'SellingPrice', 'TM30_RoomsOTB',
       'TM30_RevOTB', 'TM30_TRN_RoomsOTB', 'TM30_TRN_RevOTB', 'TM15_RoomsOTB',
       'TM15_RevOTB', 'TM15_TRN_RoomsOTB', 'TM15_TRN_RevOTB', 'TM05_RoomsOTB',
       'TM05_RevOTB', 'TM05_TRN_RoomsOTB', 'TM05_TRN_RevOTB', 'RemSupply',
       'Mon', 'Sat', 'Sun', 'Thu', 'Tue', 'Wed', 'NONTRN_RoomsOTB',
       'NONTRN_RevOTB', 'NONTRN_ADR_OTB', 'NONTRN_CxlForecast',
       'LYA_RoomsSold', 'LYA_ADR', 'LYA_RoomRev', 'LYA_NumCancels',
       'LYA_TRN_RoomsSold', 'LYA_TRN_ADR', 'LYA_TRN_RoomRev', 'RoomsGapToLYA',
       'ADR_GapToLYA', 'TRN_ADR_OTB', 'TM30_ADR_OTB', 'TM30_RoomsPickup',
       'TM30_RevPickup', 'TM30_ADR_Pickup', 'TM30_TRN_ADR_OTB',
       'TM30_TRN_RoomsPickup', 'TM30_TRN_RevPickup', 'TM30_TRN_ADR_Pickup',
       'TM30_NONTRN_RoomsOTB', 'TM30_NONTRN_RevOTB'

# EW- NEXT STEPS (THU 5PM)

1. drop unneeded, post-processed TM_nn columns in blank cell above (create list in agg_utils.py)
2. pull stly cols via merge below
3. calculate pace
4. drop unneeded, post-processed stly cols
5. add all of the steps in this notebook to agg.py
6. pull features from list at top of this NB
7. train/test split
8. linear regression (predict RoomsSold)
9. randomForest (predict RoomsSold)

**Time to pull STLY columns. I will accomplish this by merging df_sim on top of itself and pulling the below columns into the next year's row with the `'STLY_'` prefix.***

But before we do that, let's make sure we add in the ADR columns.

NEVERMIND - THIS STEP NEEDS TO COME LAST ONCE WE HAVE ALL OF THE OTHER COLUMNS

In [15]:
# pull STLY columns with self-merge to STLY date

stly_cols = [
    'RoomsOTB', 'ADR_OTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
    'TRN_RevOTB', 'TRN_CxlForecast', 'TRNP_RoomsOTB', 'TRNP_RevOTB',
    'TRNP_CxlForecast', 'GRP_RoomsOTB', 'GRP_RevOTB', 'GRP_CxlForecast',
    'CNT_RoomsOTB', 'CNT_RevOTB', 'CNT_CxlForecast',
    'TM30_RoomsOTB', 'TM30_RevOTB', 'TM30_TRN_RoomsOTB', 'TM30_TRN_RevOTB',
    'TM30_TRNP_RoomsOTB', 'TM30_TRNP_RevOTB', 'TM30_GRP_RoomsOTB',
    'TM30_GRP_RevOTB', 'TM30_CNT_RoomsOTB', 'TM30_CNT_RevOTB',
    'TM15_RoomsOTB', 'TM15_RevOTB', 'TM15_TRN_RoomsOTB', 'TM15_TRN_RevOTB',
    'TM15_TRNP_RoomsOTB', 'TM15_TRNP_RevOTB', 'TM15_GRP_RoomsOTB',
    'TM15_GRP_RevOTB', 'TM15_CNT_RoomsOTB', 'TM15_CNT_RevOTB',
    'TM05_RoomsOTB', 'TM05_RevOTB', 'TM05_TRN_RoomsOTB', 'TM05_TRN_RevOTB',
    'TM05_TRNP_RoomsOTB', 'TM05_TRNP_RevOTB', 'TM05_GRP_RoomsOTB',
    'TM05_GRP_RevOTB', 'TM05_CNT_RoomsOTB', 'TM05_CNT_RevOTB',
]

df_sim["STLY_NONTRN_RoomsOTB"] = (
    df_sim.STLY_TRNP_RoomsOTB + df_sim.STLY_GRP_RoomsOTB + df_sim.STLY_CNT_RoomsOTB
)
df_sim["STLY_NONTRN_RevOTB"] = df_sim.STLY_TRNP_RevOTB + df_sim.STLY_GRP_RevOTB + df_sim.STLY_CNT_RevOTB

AttributeError: 'DataFrame' object has no attribute 'STLY_TRNP_RoomsOTB'

In [None]:
stly_cols = df_sim.merge(df_sim[stly_cols], left_on='STLY_Date', right_on='AsOfDate')

In [None]:
h2_res[h2_res['CustomerType'] == 'Contract'].shape