In [1]:
# Dependencies
import requests
import pandas as pd
import json
import datetime
import requests
from datetime import datetime, timedelta

from config import grant_type, client_id, client_secret, redirect_url,refresh_token

In [2]:
# Step 1: Create the OAuth link and authorize the access
oauth_link = f"https://hotels.cloudbeds.com/api/v1.1/oauth?client_id={client_id}&redirect_uri={redirect_url}&response_type=code"
print(f"Please go to this URL to authorize access: {oauth_link}")

# Step 2: Retrieve the authorization code from the redirect URL
auth_code = input("Enter the authorization code from the redirect URL: ")

url = 'https://hotels.cloudbeds.com/api/v1.1/access_token'
headers = {'Content-type': 'application/json'}

# Specify the grant type and other parameters based on your authentication flow
if grant_type == 'authorization_code':
    data = {
        'grant_type': 'authorization_code',
        'client_id': client_id,
        'client_secret': client_secret,
        'redirect_uri': redirect_url,
        'code': auth_code
    }
elif grant_type == 'refresh_token':
    data = {
        'grant_type': 'refresh_token',
        'client_id': client_id,
        'client_secret': client_secret,
        'refresh_token': refresh_token
    }

response = requests.post(url, data=data)

if response.ok:
    access_token_data = response.json()
    access_token = access_token_data["access_token"]
    token_type = access_token_data["token_type"]
    expires_in = access_token_data["expires_in"]
    refresh_token = access_token_data.get("refresh_token")
    print(f"Access token: {access_token}")
    print(f"Token type: {token_type}")
    print(f"Expires in: {expires_in} seconds")
    if refresh_token:
        print(f"Refresh token: {refresh_token}")
else:
    print("Error retrieving access token.")
    print(response.json())

Please go to this URL to authorize access: https://hotels.cloudbeds.com/api/v1.1/oauth?client_id=live1_172779_NOXF9fcUKEH6zMahDiSuv4T0&redirect_uri=https://webhook.site/2004f68e-9121-4331-a917-8bb8f74fbbb2&response_type=code
Access token: eyJraWQiOiI5eFUtV09RSWFtWXdLM3JneHF0NG9wbWNEVXdXWlQ3QXRUQm9TTzVuQ3hnIiwiYWxnIjoiUlMyNTYifQ.eyJ2ZXIiOjEsImp0aSI6IkFULl9JOFo0SHRUaldqMHRjT3ExVHp0QUhpU285UHBTWjhjS3RiZ1hLdU9ELWcub2FyMTJlZ3d0d09aOHhJZmI1ZDciLCJpc3MiOiJodHRwczovL2lkcC5jbG91ZGJlZHMuY29tL29hdXRoMi9hdXNkNWcydTY5QmxKNFdBYzVkNiIsImF1ZCI6Imh0dHBzOi8vaG90ZWxzLmNsb3VkYmVkcy5jb20vYXBpIiwiaWF0IjoxNjg1MDY1NDgzLCJleHAiOjE2ODUwNjkwODMsImNpZCI6ImxpdmUxXzE3Mjc3OV9OT1hGOWZjVUtFSDZ6TWFoRGlTdXY0VDAiLCJ1aWQiOiIwMHUxbmthcHRlUjYwNmdmWDVkNyIsInNjcCI6WyJvZmZsaW5lX2FjY2VzcyJdLCJhdXRoX3RpbWUiOjE2ODQxODU5NzgsInN1YiI6ImtlbnRyb3RoKzFAZ21haWwuY29tIiwiYXNzb2NpYXRpb25JZHMiOltdLCJpc2xhbmQiOjEsInByb3BlcnR5SWRzIjpbMTcyNzc5XSwibWZkVXNlcklkIjoyMzMyOTIsInR5cGUiOiJwcm9wZXJ0eSJ9.jUSs96gaBxa-070mN_VqWGnPujl-CshgS3jRcEDV6z8WWSOsP

In [3]:
#Pull 1 year past data

url = "https://hotels.cloudbeds.com/api/v1.1/getReservationsWithRateDetails"

# Set up pagination parameters
page_number = 1
page_size = 100

results_past=[]

# Set the date range for the previous year
today = datetime.now()
one_year_ago = today - timedelta(days=365)

while True:
    # Parameters for the API request
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        "reservationCheckOutFrom": one_year_ago.strftime("%Y-%m-%d"),
        "reservationCheckOutTo": today.strftime("%Y-%m-%d"),
        "sortByRecent": True,
        "pageNumber": page_number,
        "pageSize": page_size
    }

    # Make the API request
    response = requests.get(url, params=params, headers=headers)

    # Extract the 'data' field from the API response
    api_data = response.json().get('data', [])

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(api_data)

    # Append the DataFrame to the overall results list
    results_past.append(df)

    # Check if there are more results
    if len(df) < page_size:
        break

    # Increment the page number for the next request
    page_number += 1

# Concatenate all the DataFrames into one
df_all_past = pd.concat(results_past, ignore_index=True)

# Print the final DataFrame
print(df_all_past)

      reservationID  isDeleted          dateCreated       dateCreatedUTC  \
0     0283876693673      False  2023-04-23 19:36:39  2023-04-24 00:36:39   
1     0639615281850      False  2023-05-24 19:53:34  2023-05-25 00:53:34   
2     8216207647932      False  2023-02-25 09:27:37  2023-02-25 15:27:37   
3     4800649080969      False  2023-05-22 23:51:04  2023-05-23 04:51:04   
4     8769704772509      False  2023-03-08 18:28:02  2023-03-09 00:28:02   
...             ...        ...                  ...                  ...   
7500  2981326909508      False  2022-01-27 08:46:12  2022-01-27 14:46:12   
7501  1723741176202      False  2022-01-21 07:48:29  2022-01-21 13:48:29   
7502  2625454162721      False  2022-01-16 09:11:45  2022-01-16 15:11:45   
7503  3118431497232      False  2021-12-05 16:11:26  2021-12-05 22:11:26   
7504  6597804802670      False  2022-01-07 01:46:26  2022-01-07 07:46:26   

             dateModified      dateModifiedUTC       status  \
0     2023-05-25 12:25:0

In [4]:
#Pull 1 year future data

url = "https://hotels.cloudbeds.com/api/v1.1/getReservationsWithRateDetails"

# Set up pagination parameters
page_number = 1
page_size = 100

results_future=[]

# Set the date range for the previous year
today = datetime.now()
one_year_ahead = today + timedelta(days=365)

while True:
    # Parameters for the API request
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    params = {
        "reservationCheckOutFrom": today.strftime("%Y-%m-%d"),
        "reservationCheckOutTo": one_year_ahead.strftime("%Y-%m-%d"),
        "sortByRecent": True,
        "pageNumber": page_number,
        "pageSize": page_size
    }


    # Make the API request
    response = requests.get(url, params=params, headers=headers)

    # Extract the 'data' field from the API response
    api_data = response.json().get('data', [])

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(api_data)

    # Append the DataFrame to the overall results list
    results_future.append(df)

    # Check if there are more results
    if len(df) < page_size:
        break

    # Increment the page number for the next request
    page_number += 1

# Concatenate all the DataFrames into one
df_all_future = pd.concat(results_future, ignore_index=True)

# Print the final DataFrame
print(df_all_future)

     reservationID  isDeleted          dateCreated       dateCreatedUTC  \
0    9595850071467      False  2023-04-09 21:03:27  2023-04-10 02:03:27   
1    0278823263007      False  2023-05-25 18:40:24  2023-05-25 23:40:24   
2    1753623055853      False  2023-04-18 05:32:20  2023-04-18 10:32:20   
3    9471939176154      False  2023-01-09 02:57:55  2023-01-09 08:57:55   
4    0486584492642      False  2023-05-25 19:04:25  2023-05-26 00:04:25   
..             ...        ...                  ...                  ...   
661  8668732599792      False  2022-10-02 01:01:18  2022-10-02 06:01:18   
662  8901269886916      False  2022-09-21 12:43:29  2022-09-21 17:43:29   
663  7442886603568      False  2022-09-14 13:03:45  2022-09-14 18:03:45   
664  9768135512101      False  2022-09-14 09:29:29  2022-09-14 14:29:29   
665  8438510931843      False  2022-08-20 22:55:12  2022-08-21 03:55:12   

            dateModified      dateModifiedUTC      status reservationCheckIn  \
0    2023-05-25 19:

In [5]:
print(f"We received {len(df_all_past)} past reservations.")
print(f"We received {len(df_all_future)} future resservations.")

We received 7505 past reservations.
We received 666 future resservations.


In [21]:
#create cleaning data function
def expand_dataframe(df):
    expanded_rows = []
    
    for index, row in df.iterrows():
        detailed_rates = row['detailedRates']

        if detailed_rates:
            for date, price in detailed_rates.items():
                new_row = row.copy()
                new_row['date'] = date
                new_row['price'] = price
                expanded_rows.append(new_row)

    expanded_df = pd.DataFrame(expanded_rows)
    expanded_df.drop('detailedRates', axis=1, inplace=True)
    
    expanded_df['source'] = expanded_df['source'].apply(lambda x: x['name'])
    expanded_df.rename(columns={'source': 'source_name'}, inplace=True)
    
    expanded_df['adults'] = expanded_df['rooms'].apply(lambda x: x[0]['adults'])
    expanded_df.drop('rooms', axis=1, inplace=True)
    
    expanded_df['date'] = pd.to_datetime(expanded_df['date'])
    expanded_df['day'] = expanded_df['date'].dt.day
    expanded_df['month'] = expanded_df['date'].dt.month
    expanded_df['year'] = expanded_df['date'].dt.year
    
    return expanded_df

#create filter data for 'canceled' function
def filter_canceled_reservations(df):
    return df[df['status'] != 'canceled']

# Filter 'canceled' reservations
df_all_past = filter_canceled_reservations(df_all_past)
df_all_future = filter_canceled_reservations(df_all_future)

# Clean and expand dataframes
expanded_df_past = expand_dataframe(df_all_past[['reservationID', 'source', 'detailedRates', 'rooms']])
expanded_df_future = expand_dataframe(df_all_future[['reservationID', 'source', 'detailedRates', 'rooms']])


In [22]:
expanded_df_future.head()

Unnamed: 0,reservationID,source_name,date,price,adults,day,month,year
0,9595850071467,Hostelworld,2023-05-25,159.8,1,25,5,2023
0,9595850071467,Hostelworld,2023-05-26,190.4,1,26,5,2023
0,9595850071467,Hostelworld,2023-05-27,190.4,1,27,5,2023
0,9595850071467,Hostelworld,2023-05-28,159.8,1,28,5,2023
0,9595850071467,Hostelworld,2023-05-29,146.2,1,29,5,2023


In [23]:
expanded_df_past.head()

Unnamed: 0,reservationID,source_name,date,price,adults,day,month,year
0,283876693673,Booking.com,2023-05-23,149.0,2,23,5,2023
0,283876693673,Booking.com,2023-05-24,149.0,2,24,5,2023
1,639615281850,Website/Booking Engine,2023-05-24,125.0,1,24,5,2023
2,8216207647932,Expedia,2023-05-24,119.0,2,24,5,2023
3,4800649080969,Hostelworld,2023-05-24,32.3,1,24,5,2023


In [34]:


# Create a pivot table with sum of prices
pivot_table_past_rev = pd.pivot_table(expanded_df_past, values='price', index='month', columns='day', aggfunc='sum')
pivot_table_future_rev = pd.pivot_table(expanded_df_future, values='price', index='month', columns='day', aggfunc='sum')

# Reindex the columns to have labels 1-31
pivot_table_past_rev = pivot_table_past_rev.reindex(columns=range(1, 32))
pivot_table_future_rev = pivot_table_future_rev.reindex(columns=range(1, 32))

# Reindex the rows to have labels 1-12 and sort by year
sorted_months_past_rev = sorted(pivot_table_past_rev.index.unique(), key=lambda x: (expanded_df_past[expanded_df_past['month'] == x]['year'].min(), x))
pivot_table_past_rev = pivot_table_past_rev.reindex(index=sorted_months_past_rev)

sorted_months_future_rev = sorted(pivot_table_future_rev.index.unique(), key=lambda x: (expanded_df_future[expanded_df_future['month'] == x]['year'].min(), x))
pivot_table_future_rev = pivot_table_future_rev.reindex(index=sorted_months_future_rev)



In [35]:
# Convert 'adults' column to numeric values
expanded_df_past['adults'] = pd.to_numeric(expanded_df_past['adults'])
expanded_df_future['adults'] = pd.to_numeric(expanded_df_future['adults'])

# Create a pivot table with sum of adults per day divided by 68
pivot_table_past_occ = pd.pivot_table(expanded_df_past, values='adults', index='month', columns='day', aggfunc='sum') / 68
pivot_table_future_occ = pd.pivot_table(expanded_df_future, values='adults', index='month', columns='day', aggfunc='sum') / 68

# Reindex the columns to have labels 1-31
pivot_table_past_occ = pivot_table_past_occ.reindex(columns=range(1, 32))
pivot_table_future_occ = pivot_table_future_occ.reindex(columns=range(1, 32))

# Reindex the rows to have labels 1-12 and sort by year
sorted_months_past_occ = sorted(pivot_table_past_occ.index.unique(), key=lambda x: (expanded_df_past[expanded_df_past['month'] == x]['year'].min(), x))
pivot_table_past_occ = pivot_table_past_occ.reindex(index=sorted_months_past_occ)

sorted_months_future_occ = sorted(pivot_table_future_occ.index.unique(), key=lambda x: (expanded_df_future[expanded_df_future['month'] == x]['year'].min(), x))
pivot_table_future_occ = pivot_table_future_occ.reindex(index=sorted_months_future_occ)



In [36]:
pivot_table_past_rev.to_csv('past_rev.csv',index=False)
pivot_table_past_occ.to_csv('past_occ.csv',index=False)
pivot_table_future_rev.to_csv('future_rev.csv',index=False)
pivot_table_future_occ.to_csv('future_occ.csv',index=False)

In [37]:
# Print the pivot table
print(pivot_table_past_rev)

day         1        2        3        4        5        6        7        8   \
month                                                                           
5      1873.48  1722.61  2096.95  2445.82  2908.14  3068.36  2222.98  1860.31   
6      1944.82  1947.70  2803.71  2916.05  1713.21  1612.88  1790.20  2116.54   
7      2646.65  2699.50  2286.66  1726.92  1391.08  1276.18  1917.72  2688.52   
8      1852.92  1759.69  1903.54  1923.05  2491.50  2522.73  1472.18  1323.90   
9      1964.37  2346.49  2490.84  1795.50  1346.66  1342.32  1383.99  1891.87   
10     2728.29  1527.88  1244.03  1702.41  1799.99  3338.04  4299.60  4299.60   
11     1081.00  1060.00  1265.00  2496.12  2359.67  1332.86  1092.77  1540.06   
12     2013.82  2167.28  2413.05  1245.10  1267.94  1708.60  1816.30  1957.34   
1      1599.57  1549.11  1217.86  1523.08  2132.95  2255.47  2296.76  1332.12   
2      1165.21  1514.11  2100.82  2641.99  1485.32  1646.96  1450.48  1594.93   
3      1544.81  1743.87  238

In [38]:
print(pivot_table_future_rev)

day         1        2        3        4        5        6       7        8   \
month                                                                          
5          NaN      NaN      NaN      NaN      NaN      NaN     NaN      NaN   
6      2104.90  2756.74  2734.38  1569.01  1080.50   672.29   966.2  1178.61   
7       483.49   387.76   460.66   587.55   791.77  1487.86  2054.5  2208.12   
8        39.00    82.10   121.50   141.84   141.84    41.00    78.0   265.40   
9       527.15   782.38   716.68   394.18   343.70   308.00   348.0   540.00   
10      244.00   208.00   208.00      NaN   319.00  1911.00  1992.0  1188.00   
11       41.00    62.00    81.00    81.00    52.00    41.00    41.0    41.00   
12       47.00    42.50      NaN      NaN      NaN      NaN     NaN      NaN   
1        37.00    37.00    37.00    39.00    47.00    47.00     NaN      NaN   

day         9        10  ...       22      23       24       25       26  \
month                    ...               

In [39]:
print(pivot_table_future_occ)

day          1         2         3         4         5         6         7   \
month                                                                         
5           NaN       NaN       NaN       NaN       NaN       NaN       NaN   
6      0.661765  0.691176  0.661765  0.514706  0.367647  0.220588  0.382353   
7      0.132353  0.132353  0.176471  0.205882  0.294118  0.514706  0.588235   
8      0.014706  0.029412  0.044118  0.044118  0.044118  0.014706  0.029412   
9      0.102941  0.147059  0.161765  0.102941  0.073529  0.058824  0.058824   
10     0.014706  0.014706  0.014706       NaN  0.058824  0.264706  0.279412   
11     0.014706  0.014706  0.014706  0.014706  0.014706  0.014706  0.014706   
12     0.014706  0.014706       NaN       NaN       NaN       NaN       NaN   
1      0.014706  0.014706  0.014706  0.014706  0.014706  0.014706       NaN   

day          8         9         10  ...        22        23        24  \
month                                ...                

In [40]:
print(pivot_table_past_occ)

day          1         2         3         4         5         6         7   \
month                                                                         
5      0.647059  0.617647  0.750000  0.794118  0.720588  0.794118  0.705882   
6      0.720588  0.661765  0.838235  0.764706  0.558824  0.558824  0.573529   
7      0.764706  0.794118  0.794118  0.588235  0.500000  0.411765  0.573529   
8      0.632353  0.588235  0.750000  0.691176  0.661765  0.661765  0.470588   
9      0.558824  0.588235  0.691176  0.647059  0.485294  0.455882  0.500000   
10     0.455882  0.411765  0.426471  0.544118  0.573529  0.617647  0.558824   
11     0.352941  0.352941  0.352941  0.529412  0.529412  0.367647  0.352941   
12     0.705882  0.750000  0.750000  0.470588  0.500000  0.617647  0.647059   
1      0.602941  0.573529  0.485294  0.573529  0.750000  0.750000  0.779412   
2      0.455882  0.588235  0.647059  0.735294  0.514706  0.573529  0.588235   
3      0.514706  0.617647  0.647059  0.720588  0.735