# Adding UUIDs to Reservations

In [1]:
## Import pandas to handle data and `uuid` to create unique reservation IDs

import pandas as pd
import uuid

# Load and Concatenate Data

In [2]:
data = pd.concat([pd.read_csv('../../data/source/H1.csv'),
                  pd.read_csv('../../data/source/H2.csv')],
                 axis = 0)
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate
0,0,342,2015,July,27,1,0,0,2,0.0,...,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01
1,0,737,2015,July,27,1,0,0,2,0.0,...,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01
2,0,7,2015,July,27,1,0,1,1,0.0,...,No Deposit,,,0,Transient,75.00,0,0,Check-Out,2015-07-02
3,0,13,2015,July,27,1,0,1,1,0.0,...,No Deposit,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02
4,0,14,2015,July,27,1,0,2,2,0.0,...,No Deposit,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,0,23,2017,August,35,30,2,5,2,0.0,...,No Deposit,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06
79326,0,102,2017,August,35,31,2,5,3,0.0,...,No Deposit,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07
79327,0,34,2017,August,35,31,2,5,2,0.0,...,No Deposit,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07
79328,0,109,2017,August,35,31,2,5,2,0.0,...,No Deposit,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07


# Generate UUIDs and Append to DataFrame

In [3]:
# Generate UUIDs for each row in the dataframe
data['UUID'] = [uuid.uuid4() for _ in range(len(data))]
data

Unnamed: 0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,UUID
0,0,342,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,9a3402b4-a328-43b7-8a85-18e45379f0e7
1,0,737,2015,July,27,1,0,0,2,0.0,...,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,e5744d91-faaa-4558-a4b3-886c0dfc91ab
2,0,7,2015,July,27,1,0,1,1,0.0,...,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,ee3b1c49-95cd-4511-b8dc-aea390710b59
3,0,13,2015,July,27,1,0,1,1,0.0,...,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,42e92ea1-37fe-4a75-ace7-db5999b95d16
4,0,14,2015,July,27,1,0,2,2,0.0,...,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,2d029adb-c7cf-4e50-b8d7-93f20ed9cde3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,0,23,2017,August,35,30,2,5,2,0.0,...,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06,f31c226a-8ac9-46e1-b2ba-61baec8c74cd
79326,0,102,2017,August,35,31,2,5,3,0.0,...,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07,2cba2d80-f130-4b17-916c-cf9c59291a61
79327,0,34,2017,August,35,31,2,5,2,0.0,...,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07,751c3f3b-e761-44ab-b5be-83861dbd8b74
79328,0,109,2017,August,35,31,2,5,2,0.0,...,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07,adb503da-26a2-46d3-998e-dbacfacf86a8


## Convert UUIDs to String Datatype

In [4]:
data['UUID'] = data['UUID'].astype(str)
data['UUID']

0        9a3402b4-a328-43b7-8a85-18e45379f0e7
1        e5744d91-faaa-4558-a4b3-886c0dfc91ab
2        ee3b1c49-95cd-4511-b8dc-aea390710b59
3        42e92ea1-37fe-4a75-ace7-db5999b95d16
4        2d029adb-c7cf-4e50-b8d7-93f20ed9cde3
                         ...                 
79325    f31c226a-8ac9-46e1-b2ba-61baec8c74cd
79326    2cba2d80-f130-4b17-916c-cf9c59291a61
79327    751c3f3b-e761-44ab-b5be-83861dbd8b74
79328    adb503da-26a2-46d3-998e-dbacfacf86a8
79329    37f3c70a-d193-412d-bee2-8cd170dad034
Name: UUID, Length: 119390, dtype: object

## Set UUIDs as Index

---

By setting the UUIDs as the dataframe's index, I maintain the unique identifier for each reservation and enable myself to perform joins with other datasets (such as engineered datasets later in the workflow0).

---

In [5]:
data = data.set_index('UUID')
data

Unnamed: 0_level_0,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,...,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate
UUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9a3402b4-a328-43b7-8a85-18e45379f0e7,0,342,2015,July,27,1,0,0,2,0.0,...,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01
e5744d91-faaa-4558-a4b3-886c0dfc91ab,0,737,2015,July,27,1,0,0,2,0.0,...,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01
ee3b1c49-95cd-4511-b8dc-aea390710b59,0,7,2015,July,27,1,0,1,1,0.0,...,No Deposit,,,0,Transient,75.00,0,0,Check-Out,2015-07-02
42e92ea1-37fe-4a75-ace7-db5999b95d16,0,13,2015,July,27,1,0,1,1,0.0,...,No Deposit,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02
2d029adb-c7cf-4e50-b8d7-93f20ed9cde3,0,14,2015,July,27,1,0,2,2,0.0,...,No Deposit,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f31c226a-8ac9-46e1-b2ba-61baec8c74cd,0,23,2017,August,35,30,2,5,2,0.0,...,No Deposit,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06
2cba2d80-f130-4b17-916c-cf9c59291a61,0,102,2017,August,35,31,2,5,3,0.0,...,No Deposit,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07
751c3f3b-e761-44ab-b5be-83861dbd8b74,0,34,2017,August,35,31,2,5,2,0.0,...,No Deposit,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07
adb503da-26a2-46d3-998e-dbacfacf86a8,0,109,2017,August,35,31,2,5,2,0.0,...,No Deposit,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07


# Save Results

In [6]:
data.to_feather('../../data/source/full_data.feather', compression = 'zstd')