# Adding UUIDs to Reservations

In [2]:
## Import pandas to handle data and `uuid` to create unique reservation IDs
import pandas as pd
import uuid

# Load and Concatenate Data

In [3]:
## Load datasets and add column to indicate hotel type/location

df_h1 = pd.read_csv('../../data/source/H1.csv')
df_h1['HotelNumber'] = 'H1'
df_h1['HotelNumber'] = df_h1['HotelNumber'].astype('category')

df_h2 = pd.read_csv('../../data/source/H2.csv')
df_h2['HotelNumber'] = 'H2'
df_h2['HotelNumber'] = df_h2['HotelNumber'].astype('category')

In [4]:
data = pd.concat([df_h1,df_h2],axis = 0)
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber
0,0,342,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
1,0,737,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
2,0,7,2015,July,27,1,0,1,1,0.0,...,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
3,0,13,2015,July,27,1,0,1,1,0.0,...,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
4,0,14,2015,July,27,1,0,2,2,0.0,...,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,0,23,2017,August,35,30,2,5,2,0.0,...,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2
79326,0,102,2017,August,35,31,2,5,3,0.0,...,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2
79327,0,34,2017,August,35,31,2,5,2,0.0,...,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2
79328,0,109,2017,August,35,31,2,5,2,0.0,...,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2


# Generate UUIDs and Append to DataFrame

In [5]:
# Generate UUIDs for each row in the dataframe
data['UUID'] = [uuid.uuid4() for _ in range(len(data))]
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber,UUID
0,0,342,2015,July,27,1,0,0,2,0.0,...,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1,b7b7bff5-5e99-48fa-a614-796b706b5534
1,0,737,2015,July,27,1,0,0,2,0.0,...,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1,9f00202b-a417-4274-9c54-caf6164e907d
2,0,7,2015,July,27,1,0,1,1,0.0,...,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1,27845539-b5e8-44cc-98c6-d136e89e6187
3,0,13,2015,July,27,1,0,1,1,0.0,...,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1,7e2c0b9c-cf72-48c6-8682-589c98817905
4,0,14,2015,July,27,1,0,2,2,0.0,...,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1,0f7c583d-eacd-4a13-a4ed-24d949796a74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,0,23,2017,August,35,30,2,5,2,0.0,...,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2,cd3ea09d-7eb5-42e7-9746-9b6c16a5b945
79326,0,102,2017,August,35,31,2,5,3,0.0,...,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2,51ec8da2-b0d7-4b52-9b1d-82ab91646169
79327,0,34,2017,August,35,31,2,5,2,0.0,...,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2,9e9806b5-4d81-43cc-ba61-33f30c49ac9f
79328,0,109,2017,August,35,31,2,5,2,0.0,...,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2,dfb7312b-63d1-4bab-9144-5a5a88482b4a


## Convert UUIDs to String Datatype

In [6]:
data['UUID'] = data['UUID'].astype(str)
data['UUID']

0        b7b7bff5-5e99-48fa-a614-796b706b5534
1        9f00202b-a417-4274-9c54-caf6164e907d
2        27845539-b5e8-44cc-98c6-d136e89e6187
3        7e2c0b9c-cf72-48c6-8682-589c98817905
4        0f7c583d-eacd-4a13-a4ed-24d949796a74
                         ...                 
79325    cd3ea09d-7eb5-42e7-9746-9b6c16a5b945
79326    51ec8da2-b0d7-4b52-9b1d-82ab91646169
79327    9e9806b5-4d81-43cc-ba61-33f30c49ac9f
79328    dfb7312b-63d1-4bab-9144-5a5a88482b4a
79329    9cb4fc72-5438-4765-99ec-bcca6bc997e7
Name: UUID, Length: 119390, dtype: object

## Set UUIDs as Index

---

By setting the UUIDs as the dataframe's index, I maintain the unique identifier for each reservation and enable myself to perform joins with other datasets (such as engineered datasets later in the workflow0).

---

In [7]:
# data = data.set_index('UUID')
# data

# Save Results

In [8]:
data.to_feather('../../data/source/full_data.feather', compression = 'zstd')