# Adding UUIDs to Reservations

In [1]:
## Import pandas to handle data and `uuid` to create unique reservation IDs
import pandas as pd
import uuid

# Load and Concatenate Data

In [2]:
## Load datasets and add column to indicate hotel type/location

df_h1 = pd.read_csv('../../data/source/H1.csv')
df_h1['HotelNumber'] = 'H1'
df_h1['HotelNumber'] = df_h1['HotelNumber'].astype('category')

df_h2 = pd.read_csv('../../data/source/H2.csv')
df_h2['HotelNumber'] = 'H2'
df_h2['HotelNumber'] = df_h2['HotelNumber'].astype('category')

In [3]:
data = pd.concat([df_h1,df_h2],axis = 0)
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber
0,0,342,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
1,0,737,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
2,0,7,2015,July,27,1,0,1,1,0.0,...,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
3,0,13,2015,July,27,1,0,1,1,0.0,...,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
4,0,14,2015,July,27,1,0,2,2,0.0,...,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,0,23,2017,August,35,30,2,5,2,0.0,...,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2
79326,0,102,2017,August,35,31,2,5,3,0.0,...,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2
79327,0,34,2017,August,35,31,2,5,2,0.0,...,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2
79328,0,109,2017,August,35,31,2,5,2,0.0,...,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2


# Generate UUIDs and Append to DataFrame

In [4]:
# Generate UUIDs for each row in the dataframe
data['UUID'] = [uuid.uuid4() for _ in range(len(data))]
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber,UUID
0,0,342,2015,July,27,1,0,0,2,0.0,...,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1,b6bcab0c-31d0-44e4-b75f-8829827e31e9
1,0,737,2015,July,27,1,0,0,2,0.0,...,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1,41a9180d-282d-47c2-88df-64ccdc6918d9
2,0,7,2015,July,27,1,0,1,1,0.0,...,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1,e76a8f0c-8357-49b8-9011-8f564b253cd0
3,0,13,2015,July,27,1,0,1,1,0.0,...,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1,d8068aa5-b29f-4ec8-84f5-a022ee3bad4e
4,0,14,2015,July,27,1,0,2,2,0.0,...,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1,8e44cadc-ca9e-4042-a473-835e4f32ae6b
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,0,23,2017,August,35,30,2,5,2,0.0,...,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2,ac33bdd7-a82f-4a63-91fc-c46344ea4a7d
79326,0,102,2017,August,35,31,2,5,3,0.0,...,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2,fc5e765d-8d5e-4dc7-9215-b057a1b42a00
79327,0,34,2017,August,35,31,2,5,2,0.0,...,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2,303279b2-56b3-4290-bc71-3e9274f8af03
79328,0,109,2017,August,35,31,2,5,2,0.0,...,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2,744f8f46-0a68-4549-9ae6-a0479d09819e


## Convert UUIDs to String Datatype

In [5]:
data['UUID'] = data['UUID'].astype(str)
data['UUID']

0        b6bcab0c-31d0-44e4-b75f-8829827e31e9
1        41a9180d-282d-47c2-88df-64ccdc6918d9
2        e76a8f0c-8357-49b8-9011-8f564b253cd0
3        d8068aa5-b29f-4ec8-84f5-a022ee3bad4e
4        8e44cadc-ca9e-4042-a473-835e4f32ae6b
                         ...                 
79325    ac33bdd7-a82f-4a63-91fc-c46344ea4a7d
79326    fc5e765d-8d5e-4dc7-9215-b057a1b42a00
79327    303279b2-56b3-4290-bc71-3e9274f8af03
79328    744f8f46-0a68-4549-9ae6-a0479d09819e
79329    c7288033-e49e-4b39-af04-4ec20b96bc45
Name: UUID, Length: 119390, dtype: object

## Set UUIDs as Index

---

By setting the UUIDs as the dataframe's index, I maintain the unique identifier for each reservation and enable myself to perform joins with other datasets (such as engineered datasets later in the workflow0).

---

In [6]:
data = data.set_index('UUID')
data

Unnamed: 0_level_0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,HotelNumber
UUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b6bcab0c-31d0-44e4-b75f-8829827e31e9,0,342,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
41a9180d-282d-47c2-88df-64ccdc6918d9,0,737,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,H1
e76a8f0c-8357-49b8-9011-8f564b253cd0,0,7,2015,July,27,1,0,1,1,0.0,...,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
d8068aa5-b29f-4ec8-84f5-a022ee3bad4e,0,13,2015,July,27,1,0,1,1,0.0,...,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,H1
8e44cadc-ca9e-4042-a473-835e4f32ae6b,0,14,2015,July,27,1,0,2,2,0.0,...,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,H1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ac33bdd7-a82f-4a63-91fc-c46344ea4a7d,0,23,2017,August,35,30,2,5,2,0.0,...,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06,H2
fc5e765d-8d5e-4dc7-9215-b057a1b42a00,0,102,2017,August,35,31,2,5,3,0.0,...,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07,H2
303279b2-56b3-4290-bc71-3e9274f8af03,0,34,2017,August,35,31,2,5,2,0.0,...,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07,H2
744f8f46-0a68-4549-9ae6-a0479d09819e,0,109,2017,August,35,31,2,5,2,0.0,...,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07,H2


# Save Results

In [7]:
data.to_feather('../../data/source/full_data.feather', compression = 'zstd')