In [8]:
import sys
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.io as pio
# import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import math
sys.path.insert(1, '../')
np.random.seed(0)

# pio.templates.default = "simple_white"
# pio.templates.default = "plotly_white"
pio.renderers.default = "notebook"

In [9]:
exchange_rate = {'CNY': 6.9588, 'ZAR': 13.8691, 'KRW': 1120.456, 'SGD': 1.3718,
                 'THB': 32.9627, 'ARS': 37.7315, 'TWD': 30.8448, 'SAR': 3.752,
                 'USD': 1.0, 'MYR': 4.1797, 'SEK': 9.109, 'NZD': 1.4542,
                 'HKD': 7.8236, 'VND': 4.4e-05, 'IDR': 14330.4526,
                 'AUD': 1.3672, 'NOK': 8.5922, 'GBP': 0.7841, 'EUR': 0.8833,
                 'JPY': 113.4941, 'INR': 69.6475, 'PHP': 52.411, 'AED': 3.673,
                 'RUB': 67.0545, 'BHD': 0.377, 'CHF': 0.9984, 'OMR': 0.3851,
                 'UAH': 28.3135, 'CAD': 1.3296, 'TRY': 5.2118, 'PLN': 3.7887,
                 'ILS': 3.7211, 'PKR': 134.1847, 'DKK': 6.5934, 'RON': 4.1121,
                 'LKR': 179.3797, 'JOD': 0.7095, 'KWD': 0.3042, 'QAR': 3.6409,
                 'CZK': 22.9453, 'HUF': 285.9137, 'BRL': 3.8672,
                 'EGP': 17.9113, 'FJD': 2.1133, 'MXN': 20.3997, 'BDT': 84.1851,
                 'KZT': 375.1462, 'NGN': 365.7219, 'XPF': 105.4033,
                 'KHR': 0.00025, 'LAK': 8.6e-05}

def currency_convert_to_USD(value, currency):
    """
    Receives a money amount and a currency and converts it to USD
    :param value: The amount of money to convert
    :param currency: The original currency
    :return: The worth of this money in USD
    """
    if (currency not in exchange_rate):
        raise ValueError("The value of " + currency + " is not defined.")
    return value / exchange_rate[currency]


In [18]:
categorical_features = [
    'hotel_country_code',
    'accommadation_type_name',
    'charge_option',
    'customer_nationality',
    'guest_nationality_country_name',
    'origin_country_code',
    'language',
    'original_payment_method',
    'original_payment_type',
    'original_payment_currency',
    'cancellation_policy_code'
]

# column names in the data that has date type
date_type_cols = [
    "booking_datetime",
    "checkin_date",
    "checkout_date",
    "hotel_live_date",
    "cancellation_datetime"
]

# the features of the preprocessed data
selected_features = [
    "hotel_star_rating",
    "guest_is_not_the_customer",

    "no_of_adults",
    "no_of_children",
    "no_of_extra_bed",
    "no_of_room",

    "original_selling_amount",
    "original_payment_currency",
    
    "is_user_logged_in",
    "is_first_booking",
    
    "request_nonesmoke",
    "request_latecheckin",
    "request_highfloor",
    "request_largebed",
    "request_twinbeds",
    "request_airport",
    "request_earlycheckin",

    "booking_month",
    "booking_dayofweek",
    "no_of_nights",
    "checkin_since_booking",

    "noshow_penalty",
    "first_penalty",
    "first_ndays",
    "second_penalty",
    "second_ndays",
    
    "original_payment_type_Credit Card",
    "original_payment_type_Gift Card",
    "original_payment_type_Invoice",
    
    "charge_option_Pay Later",
    "charge_option_Pay Now",
    
    "accommadation_type_name_Apartment",
    "accommadation_type_name_Guest House / Bed & Breakfast",
    "accommadation_type_name_Hostel",
    "accommadation_type_name_Hotel",
    "accommadation_type_name_Resort",
    "accommadation_type_name_Serviced Apartment",
    "accommadation_type_name_other_accommadation",
]

In [19]:
X = pd.read_csv("../datasets/agoda_cancellation_train.csv")


In [20]:
money_clumns = ["original_payment_currency", "original_selling_amount"]
print(X[money_clumns])

      original_payment_currency  original_selling_amount
0                           CNY                   267.55
1                           ZAR                   114.15
2                           KRW                   115.79
3                           SGD                    68.74
4                           THB                   127.76
...                         ...                      ...
58654                       IDR                    11.12
58655                       PHP                    89.38
58656                       CNY                   404.52
58657                       MXN                    65.88
58658                       HKD                   206.02

[58659 rows x 2 columns]


In [21]:
# Adding a cloumn of the value of the deal in USD
convert_currency_vector = np.vectorize(currency_convert_to_USD)
X["selling_amount_USD"] = convert_currency_vector(X["original_selling_amount"], X["original_payment_currency"])
money_clumns.append("selling_amount_USD")
print(X[money_clumns])

      original_payment_currency  original_selling_amount  selling_amount_USD
0                           CNY                   267.55           38.447721
1                           ZAR                   114.15            8.230527
2                           KRW                   115.79            0.103342
3                           SGD                    68.74           50.109345
4                           THB                   127.76            3.875896
...                         ...                      ...                 ...
58654                       IDR                    11.12            0.000776
58655                       PHP                    89.38            1.705367
58656                       CNY                   404.52           58.130712
58657                       MXN                    65.88            3.229459
58658                       HKD                   206.02           26.333146

[58659 rows x 3 columns]


In [65]:
wealthy = pd.read_csv("../challenge/wealthy_countries.csv")
print(wealthy)

                     Location   Median
0                  Luxembourg  259,899
1                   Australia  238,072
2                     Iceland  231,462
3                     Belgium  230,548
4                   Hong Kong  173,768
..                        ...      ...
163                Mozambique      345
164                   Burundi      281
165                   Lesotho      264
166  Central African Republic      212
167                     Haiti      193

[168 rows x 2 columns]


In [71]:
country_code = pd.read_csv("../challenge/country_code.csv")
print(country_code)

            Country code
0       Afghanistan   AF
1           Albania   AL
2           Algeria   DZ
3    American Samoa   AS
4           Andorra   AD
..              ...  ...
244  Western Sahara   EH
245           Yemen   YE
246          Zambia   ZM
247        Zimbabwe   ZW
248   Åland Islands   AX

[249 rows x 2 columns]


In [54]:
# wealthy_dict = dict()
# for loc in wealthy[Location]:
#     wealthy_dict[loc] = 

In [78]:
wealthy[wealthy.Location =="Iceland"].Median

2    231,462
Name: Median, dtype: object