# Adding UUIDs to Reservations

In [2]:
## Import pandas to handle data and `uuid` to create unique reservation IDs
import pandas as pd
import uuid

# Load and Concatenate Data

In [3]:
## Load datasets and add column to indicate hotel type/location

df_h1 = pd.read_csv('../../data/source/H1.csv')
df_h1['HotelNumber'] = 'H1'
df_h1['HotelNumber'] = df_h1['HotelNumber'].astype('category')

df_h2 = pd.read_csv('../../data/source/H2.csv')
df_h2['HotelNumber'] = 'H2'
df_h2['HotelNumber'] = df_h2['HotelNumber'].astype('category')

In [4]:
data = pd.concat([df_h1,df_h2],axis = 0).reset_index(drop = True)
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber
0,0,342,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
1,0,737,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
2,0,7,2015,July,27,1,0,1,1,0.0,...,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
3,0,13,2015,July,27,1,0,1,1,0.0,...,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
4,0,14,2015,July,27,1,0,2,2,0.0,...,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119385,0,23,2017,August,35,30,2,5,2,0.0,...,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2
119386,0,102,2017,August,35,31,2,5,3,0.0,...,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2
119387,0,34,2017,August,35,31,2,5,2,0.0,...,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2
119388,0,109,2017,August,35,31,2,5,2,0.0,...,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2


# Generate UUIDs and Append to DataFrame

In [5]:
# Generate UUIDs for each row in the dataframe
data['UUID'] = [uuid.uuid4() for _ in range(len(data))]
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber,UUID
0,0,342,2015,July,27,1,0,0,2,0.0,...,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1,873e7749-dcb5-4c01-b54f-46082557421a
1,0,737,2015,July,27,1,0,0,2,0.0,...,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1,17c448f2-8715-43d3-a34d-06e0c6ccb502
2,0,7,2015,July,27,1,0,1,1,0.0,...,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1,ccfd11ba-608d-46bb-a97a-7af0dc59fc7d
3,0,13,2015,July,27,1,0,1,1,0.0,...,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1,0b80d489-ff99-4533-b2e2-c07747cb9681
4,0,14,2015,July,27,1,0,2,2,0.0,...,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1,9b03c838-e88c-4682-b491-1f929402c92d
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119385,0,23,2017,August,35,30,2,5,2,0.0,...,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2,6ba8edf2-3269-47dd-a643-1717a82977db
119386,0,102,2017,August,35,31,2,5,3,0.0,...,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2,1772f97b-c98e-483c-b3ed-4e19741a0c0b
119387,0,34,2017,August,35,31,2,5,2,0.0,...,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2,1da248f6-1191-4391-9490-066d3bc5d9a8
119388,0,109,2017,August,35,31,2,5,2,0.0,...,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2,2665afe4-8c01-43b7-a2db-80122963f613


## Convert UUIDs to String Datatype

In [6]:
data['UUID'] = data['UUID'].astype(str)
data['UUID']

0         873e7749-dcb5-4c01-b54f-46082557421a
1         17c448f2-8715-43d3-a34d-06e0c6ccb502
2         ccfd11ba-608d-46bb-a97a-7af0dc59fc7d
3         0b80d489-ff99-4533-b2e2-c07747cb9681
4         9b03c838-e88c-4682-b491-1f929402c92d
                          ...                 
119385    6ba8edf2-3269-47dd-a643-1717a82977db
119386    1772f97b-c98e-483c-b3ed-4e19741a0c0b
119387    1da248f6-1191-4391-9490-066d3bc5d9a8
119388    2665afe4-8c01-43b7-a2db-80122963f613
119389    55267b08-7625-4461-a922-14093836ba2b
Name: UUID, Length: 119390, dtype: object

## Set UUIDs as Index

---

By setting the UUIDs as the dataframe's index, I maintain the unique identifier for each reservation and enable myself to perform joins with other datasets (such as engineered datasets later in the workflow0).

---

In [7]:
# data = data.set_index('UUID')
# data

# Save Results

In [8]:
data.to_feather('../../data/source/full_data.feather', compression = 'zstd')