### Prepping Data Challenge: Housing Happy Hotel Guests (Week 25)
 
### Requirements
- Input the data
- Before we bring the 2 datasets together, we want to know how many Additional Requests each guest has made
  - Update N/A values to null and make sure this counts as 0 Additional Requests
- Match the guests to the rooms which have capacity for their entire party
- Filter so that double/twin bed preferences are adhered to
- Ensure guests who have accessibility requirements are only matched with accessible rooms
- Calculate the Request Satisfaction % for each room
- Filter so that guests are only left with rooms with the highest Request Satisfaction %
- Finally, for the rooms with the largest capacity, we want to ensure guests with larger parties are prioritised. Filter the data to remove parties that could fit into smaller rooms
- Output the data

In [1]:
import pandas as pd
import numpy as np

In [2]:
#input the data
with pd.ExcelFile('wk25-Input.xlsx') as xl:
    hotel = pd.read_excel(xl, 'Hotel Rooms')
    guest = pd.read_excel(xl, 'Guests')

In [3]:
guest = guest.rename(columns = {'Adults':'Adults in Party','Children':'Children in Party'})

In [4]:
hotel.tail()

Unnamed: 0,Room,Adults,Children,Features
35,508,2,,"High Floor, Double"
36,601,4,,"High Floor, Bath, Near to lift, Double"
37,602,4,,"High Floor, Bath, Double"
38,603,4,,"High Floor, Bath, Double"
39,604,4,,"High Floor, Bath, Double"


In [5]:
guest.head()

Unnamed: 0,Party,Adults in Party,Children in Party,Double/Twin,Requires Accessible Room?,Additional Requests
0,Corain,4,0,Double,N,"Bath, High Floor, NOT Near to lift"
1,Aarons,2,0,Twin,N,Bath
2,Saph,2,1,Double,N,"Bath, NOT Near to lift"
3,Baxstare,1,1,Double,N,"Bath, High Floor"
4,Kelle,1,1,Twin,N,Bath


In [6]:
#hotel.isna().values.any()
hotel.isnull().sum()

Room         0
Adults       0
Children    25
Features     0
dtype: int64

In [7]:
#guest.isna().values.any()
guest.isnull().sum()

Party                        0
Adults in Party              0
Children in Party            0
Double/Twin                  0
Requires Accessible Room?    0
Additional Requests          6
dtype: int64

In [8]:
#Update N/A values to null and make sure this counts as 0 Additional Requests
hotel['Children'] = hotel['Children'].fillna(0)
guest['Additional Requests'] = guest['Additional Requests'].fillna(0)

In [9]:
guest['Re_Adults'] = guest.apply(lambda x : 2 if x['Adults in Party'] == 1 else x['Adults in Party'], axis=1)

In [10]:
#Match the guests to the rooms which have capacity for their entire party
df = guest.merge(hotel, how='left', left_on=['Re_Adults','Children in Party'], right_on = ['Adults','Children'])

In [11]:
#Filter so that double/twin bed preferences are adhered to
df['match'] = df.apply(lambda x: str(x['Double/Twin']) in str(x['Features']), axis=1)
df = df[df['match'] == True]

In [12]:
#Ensure guests who have accessibility requirements are only matched with accessible rooms
df['match_2'] = df.apply(lambda x: (str(x["Requires Accessible Room?"]) == 'Y' and 'Accessible' in str(x['Features']))|
                          (str(x["Requires Accessible Room?"]) == 'N' and 'Accessible' not in str(x['Features'])), axis=1)
df = df[df['match_2'] == True]

In [13]:
df['total_req'] = df.apply(lambda x: (x['Additional Requests'] + ', ' + x['Double/Twin']) if x['Additional Requests'] != 0 
                               else x['Double/Twin'], axis = 1)

In [14]:
df['no_request'] = df.apply(lambda x: len(str(x['total_req']).split(', ')) if x['total_req'] != 0 else 0, axis = 1)
df['req_in_feat'] = df.apply(lambda x: len((set(x['total_req'].split(', ')).intersection(set(x['Features'].split(', ')))))
                                        if x['total_req'] != 0 else "", axis = 1)

In [15]:
#Calculate the Request Satisfaction % for each room
df['Request Satisfaction %'] = round(((df['req_in_feat'])/(df['no_request']))*100, 0)

In [16]:
#Filter so that guests are only left with rooms with the highest Request Satisfaction %
df = df[df['Request Satisfaction %'] >= 100]

In [22]:
df.head()

Unnamed: 0,Party,Adults in Party,Children in Party,Double/Twin,Requires Accessible Room?,Additional Requests,Re_Adults,Room,Adults,Children,Features,match,match_2,total_req,no_request,req_in_feat,Request Satisfaction %
14,Aarons,2,0,Twin,N,Bath,2,303.0,2.0,0.0,"Bath, Twin",True,True,"Bath, Twin",2,2,100.0
17,Aarons,2,0,Twin,N,Bath,2,401.0,2.0,0.0,"High Floor, Bath, Near to lift, Twin",True,True,"Bath, Twin",2,2,100.0
18,Aarons,2,0,Twin,N,Bath,2,405.0,2.0,0.0,"High Floor, Bath, Twin",True,True,"Bath, Twin",2,2,100.0
20,Aarons,2,0,Twin,N,Bath,2,407.0,2.0,0.0,"High Floor, Bath, Twin",True,True,"Bath, Twin",2,2,100.0
50,Baxstare,1,1,Double,N,"Bath, High Floor",2,501.0,2.0,1.0,"High Floor, Bath, Near to lift, Double",True,True,"Bath, High Floor, Double",3,3,100.0


In [18]:
#Finally, for the rooms with the largest capacity, we want to ensure guests with larger parties are prioritised. 
#Filter the data to remove parties that could fit into smaller rooms


In [19]:
output = df[['Party','Adults in Party','Children in Party','Double/Twin','Requires Accessible Room?',
                 'Additional Requests','Request Satisfaction %','Room','Adults','Children','Features']]

In [20]:
output.head(10)

Unnamed: 0,Party,Adults in Party,Children in Party,Double/Twin,Requires Accessible Room?,Additional Requests,Request Satisfaction %,Room,Adults,Children,Features
14,Aarons,2,0,Twin,N,Bath,100.0,303.0,2.0,0.0,"Bath, Twin"
17,Aarons,2,0,Twin,N,Bath,100.0,401.0,2.0,0.0,"High Floor, Bath, Near to lift, Twin"
18,Aarons,2,0,Twin,N,Bath,100.0,405.0,2.0,0.0,"High Floor, Bath, Twin"
20,Aarons,2,0,Twin,N,Bath,100.0,407.0,2.0,0.0,"High Floor, Bath, Twin"
50,Baxstare,1,1,Double,N,"Bath, High Floor",100.0,501.0,2.0,1.0,"High Floor, Bath, Near to lift, Double"
51,Baxstare,1,1,Double,N,"Bath, High Floor",100.0,503.0,2.0,1.0,"High Floor, Bath, Double"
52,Baxstare,1,1,Double,N,"Bath, High Floor",100.0,504.0,2.0,1.0,"High Floor, Bath, Double"
53,Baxstare,1,1,Double,N,"Bath, High Floor",100.0,506.0,2.0,1.0,"High Floor, Bath, Double"
54,Baxstare,1,1,Double,N,"Bath, High Floor",100.0,507.0,2.0,1.0,"High Floor, Bath, Double"
58,Kelle,1,1,Twin,N,Bath,100.0,302.0,2.0,1.0,"Bath, Twin"


In [21]:
#output the data 
output.to_excel('wk25-output.xlsx', index=False)