In [2]:
import os
import numpy as np
import pandas as pd
from datetime import datetime as dt
import openpyxl

from masakali_data.utils import open_csv, save_csv, open_parquet, current_directory
from masakali_data.exchange_rates import convert_columns_to_usd, convert_columns_to_idr

from masakali_data.smoobu import get_reservations

current_directory = current_directory()

In [3]:
df = open_parquet(current_directory, 'data/booking_revenue_usd.parquet')

In [4]:
df.head()

Unnamed: 0,booking_id,check_in,check_out,guest_name,gross_income,commission,payment_fee,vat_taxes,net_income,platform,cleaning_fee,taxable_amount,nights,villa_name
0,3059410468,2022-04-23,2022-04-25,Anne-kristell Guiot,125.3,-18.8,-2.88,-2.38,101.24,booking,27.84,73.4,2,surya
1,3903383056,2022-04-19,2022-04-20,Artem Nelipovich,45.11,-6.77,-1.04,-0.86,36.45,booking,27.84,8.6,1,jala
2,3551951695,2022-04-17,2022-04-19,MARE GOWD GR,139.3,-20.89,-3.2,-2.65,112.55,booking,27.86,84.69,2,surya
3,2418817562,2022-04-17,2022-04-19,Konkula Sudeerkumar,139.3,-20.89,-3.2,-2.65,112.55,booking,27.86,84.69,2,chandra
4,3834883830,2022-06-06,2022-06-12,sancamillo danilo,368.95,-55.34,-8.49,-7.02,298.1,booking,27.33,270.77,6,jala


In [20]:
df = pd.read_csv('C:\\Users\\kevin\\coding\\masakali_data\\booking\\booking_2022.csv')
df = df[df['Type'] == 'Reservation']

# Grab relevant columns
df = df[['Reference number', 'Check-in', 'Checkout', 'Guest name', 'Amount', 'Commission', 'Payment charge', 'VAT for online platform services', 'Net']]

# Rename columns
df = df.rename(columns={
'Platform': 'Booking.com',
'Reference number': 'booking_id', 
'Check-in': 'check_in', 
'Checkout':'check_out', 
'Guest name': 'guest_name',
'Amount': 'gross_income', 
'Commission': 'commission', 
'Payment charge': 'payment_fee', 
'VAT for online platform services': 'vat_taxes', 
'Net': 'net_income',
})

df['platform'] = 'booking'

# Turn check_in and check_out to datetime
df['check_in'] = pd.to_datetime(df.check_in)
df['check_out'] = pd.to_datetime(df.check_out)

# Add new columns
df['cleaning_fee'] = 400000
df['taxable_amount'] = df.net_income - df.cleaning_fee
df['nights'] = (df.check_out - df.check_in).dt.days

# Get all the smoobu bookings
smoobu_bookings = get_reservations()[['reference_id', 'villa_name']]
smoobu_bookings.to_csv('bookings.csv')
smoobu_bookings = smoobu_bookings.rename(columns={'reference_id': 'booking_id'})


df['booking_id'] = df.booking_id.astype('string')

smoobu_bookings[smoobu_bookings.booking_id == '3059410468']

df = df.merge(smoobu_bookings, on='booking_id', how='left')

df = df.astype({
    'booking_id': 'uint32',
    'check_in': 'datetime64[ns]',
    'check_out': 'datetime64[ns]',
    'guest_name': 'string',
    'gross_income': 'float64',
    'commission': 'float64',
    'payment_fee': 'float64',
    'vat_taxes': 'float64',
    'net_income': 'float64',
    'platform': 'category',
    'cleaning_fee': 'float64',
    'taxable_amount': 'float64',
    'nights': 'uint16',
    'villa_name': 'category'
})

df


Unnamed: 0,booking_id,check_in,check_out,guest_name,gross_income,commission,payment_fee,vat_taxes,net_income,platform,cleaning_fee,taxable_amount,nights,villa_name
0,3059410468,2022-04-23,2022-04-25,Anne-kristell Guiot,1800000.0,-270000.0,-41400.0,-34254.0,1454346.0,booking,400000.0,1054346.0,2,surya
1,3903383056,2022-04-19,2022-04-20,Artem Nelipovich,648000.0,-97200.0,-14904.0,-12331.0,523565.0,booking,400000.0,123565.0,1,jala
2,3551951695,2022-04-17,2022-04-19,MARE GOWD GR,2000000.0,-300000.0,-46000.0,-38060.0,1615940.0,booking,400000.0,1215940.0,2,surya
3,2418817562,2022-04-17,2022-04-19,Konkula Sudeerkumar,2000000.0,-300000.0,-46000.0,-38060.0,1615940.0,booking,400000.0,1215940.0,2,chandra
4,3834883830,2022-06-06,2022-06-12,sancamillo danilo,5400000.0,-810000.0,-124200.0,-102762.0,4363038.0,booking,400000.0,3963038.0,6,jala
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,2982473193,2022-12-20,2022-12-22,Katia Veloza,2600000.0,-390000.0,-59800.0,-49478.0,2100722.0,booking,400000.0,1700722.0,2,jala
69,3415749091,2022-12-20,2022-12-21,Tashna Oconnor,2100000.0,-315000.0,-48300.0,-39963.0,1696737.0,booking,400000.0,1296737.0,1,surya
70,3663819019,2022-12-16,2022-12-19,Danni Oates,3900000.0,-585000.0,-89700.0,-74217.0,3151083.0,booking,400000.0,2751083.0,3,jala
71,3633426848,2022-12-12,2022-12-15,Ayano Okabe,3900000.0,-585000.0,-89700.0,-74217.0,3151083.0,booking,400000.0,2751083.0,3,jala


In [13]:
smoobu_bookings = get_reservations()[['reference_id', 'villa_name']]
smoobu_bookings.to_csv('bookings.csv')
smoobu_bookings = smoobu_bookings.rename(columns={'reference_id': 'booking_id'})


df['booking_id'] = df.booking_id.astype('string')

smoobu_bookings[smoobu_bookings.booking_id == '3059410468']

merged = df.merge(smoobu_bookings, on='booking_id', how='left')






Unnamed: 0,booking_id,check_in,check_out,guest_name,gross_income,commission,payment_fee,vat_taxes,net_income,platform,cleaning_fee,taxable_amount,nights,villa_name
