In [1]:
import pandas as pd

In [2]:
errands_df = pd.read_parquet('Data/errands.parquet')
orders_df = pd.read_parquet('Data/orders.parquet')

In [7]:
orders_df.columns

Index(['order_id', 'PNR', 'order_created_at', 'booking_system', 'Site_Country',
       'Brand', 'Order_Amount', 'currency', 'Revenue', 'Partner',
       'Customer_Group_Type', 'Device', 'client_entry_type',
       'booking_system_source_type', 'Origin_Country', 'Destination_Country',
       'Journey_Type_ID', 'Is_Changed', 'Is_Canceled', 'cancel_reason',
       'change_reason'],
      dtype='object')

In [6]:
errands_df.columns

Index(['order_id', 'errand_id', 'created', 'errand_category', 'errand_type',
       'errand_action', 'errand_channel', 'is_test_errand'],
      dtype='object')

In [4]:
errands_df['order_number'] = errands_df['order_number'].apply(lambda x: int(x, 36))
errands_df = errands_df.rename(columns={'order_number' : 'order_id'})

In [5]:
merged_df = pd.merge(orders_df, errands_df, on='order_id', how='left')

In [8]:
currency_name_to_code = {
    'Euro': 'EUR',
    'Australian Dollar': 'AUD',
    'Brazilian Real': 'BRL',
    'US Dollar': 'USD',
    'Danish Krone': 'DKK',
    'Saudi Riyal': 'SAR',
    'Mexican Peso': 'MXN',
    'Pound Sterling': 'GBP',
    'Zloty': 'PLN',
    'Norwegian Krone': 'NOK',
    'Canadian Dollar': 'CAD',
    'United Arab Emirates dirham': 'AED',
    'Swedish Krona': 'SEK',
    'Chilean Peso': 'CLP',
    'Peso Uruguayo': 'UYU',
    'Nuevo Sol Peru': 'PEN',
    'South Korean Won': 'KRW',
    'Malaysian Ringgit': 'MYR',
    'Argentine Peso': 'ARS',
    'Thai Baht': 'THB',
    'Czech Koruna': 'CZK',
    'Colombian Peso Colombia': 'COP',
    'Kuwaiti Dinar': 'KWD',
    'Swiss Franc': 'CHF',
    'Hryvnia Ukraine': 'UAH',
    'South African Rand': 'ZAR',
    'Japanese yen': 'JPY',
    'Jordanian Dinar': 'JOD',
    'Bahraini Dinar': 'BHD',
    'New Zealand Dollar': 'NZD',
    'Indian Rupee': 'INR',
    'Egyptian Pound': 'EGP',
    'Bulgarian Lev': 'BGN',
    'Rupiah Indonesia': 'IDR',
    'Turkish Lira': 'TRY',
    'Qatari Rial': 'QAR',
    'Singapore Dollar': 'SGD',
    'Hong Kong Dollar': 'HKD',
    'Philippine Peso': 'PHP',
    'New Taiwan Dollar': 'TWD',
    'Rial Omani Oman': 'OMR',
    'Forint': 'HUF',
    'Yuan Renminbi': 'CNY',
    'Vietnamese dong': 'VND',
    'Iceland Krona': 'ISK',
    'Tenge Kazakhstan': 'KZT',
    'Uzbekistan Som': 'UZS'
}
#Static exchange rate, in actual analysis it has to be updated so it uses API, easy-exchange-rates and forex-python has been tried so far, 
#Read their documentationss to see why the exchange has problems. 
exchange_rates_to_usd = {
    'USD': 1,
    'EUR': 1.08,
    'AUD': 0.64,
    'BRL': 0.19,
    'DKK': 0.14,
    'SAR': 0.27,
    'MXN': 0.052,
    'GBP': 1.37,
    'PLN': 0.26,
    'NOK': 0.11,
    'CAD': 0.75,
    'AED': 0.27,
    'SEK': 0.095,
    'CLP': 0.0012,
    'UYU': 0.023,
    'PEN': 0.27,
    'KRW': 0.00076,
    'MYR': 0.23,
    'ARS': 0.010,
    'THB': 0.032,
    'CZK': 0.048,
    'COP': 0.00026,
    'KWD': 3.28,
    'CHF': 1.09,
    'UAH': 0.036,
    'ZAR': 0.055,
    'JPY': 0.0074,
    'JOD': 1.41,
    'BHD': 2.65,
    'NZD': 0.63,
    'INR': 0.013,
    'EGP': 0.032,
    'BGN': 0.58,
    'IDR': 0.000066,
    'TRY': 0.054,
    'QAR': 0.27,
    'SGD': 0.74,
    'HKD': 0.13,
    'PHP': 0.018,
    'TWD': 0.032,
    'OMR': 2.60,
    'HUF': 0.0031,
    'CNY': 0.15,
    'VND': 0.000043,
    'ISK': 0.0075,
    'KZT': 0.0023,
    'UZS': 0.000091
}



def convert_prices_to_usd(df):
    
    df['currency_code'] = df['currency'].map(currency_name_to_code)

    currency_index = df.columns.get_loc('currency')  
    df.insert(currency_index + 1, 'currency_code', df.pop('currency_code'))

    unmapped = df[df['currency_code'].isna()]['currency'].unique()
    if len(unmapped) > 0:
        print("Unmapped currencies:", unmapped)

    
    df['Order_Amount_in_usd'] = df.apply(
        lambda row: row['Order_Amount'] * exchange_rates_to_usd.get(row['currency_code'], 0) if row['currency_code'] in exchange_rates_to_usd else 0,
        axis=1
    )
    
    price_index = df.columns.get_loc('Order_Amount')
    df.insert(price_index + 1, 'Order_Amount_in_usd', df.pop('Order_Amount_in_usd'))
    df.drop('Order_Amount', axis= 1, inplace = True)

    df['Revenue_in_usd'] = df.apply(
        lambda row: row['Revenue'] * exchange_rates_to_usd.get(row['currency_code'], 0) if row['currency_code'] in exchange_rates_to_usd else 0,
        axis=1
    )
    
    price_index = df.columns.get_loc('Revenue')
    df.insert(price_index + 1, 'Revenue_in_usd', df.pop('Revenue_in_usd'))
    df.drop('Revenue', axis = 1, inplace = True)

    return df


merged_df = convert_prices_to_usd(merged_df)


In [9]:
merged_df.columns

Index(['order_id', 'PNR', 'order_created_at', 'booking_system', 'Site_Country',
       'Brand', 'Order_Amount_in_usd', 'currency', 'currency_code',
       'Revenue_in_usd', 'Partner', 'Customer_Group_Type', 'Device',
       'client_entry_type', 'booking_system_source_type', 'Origin_Country',
       'Destination_Country', 'Journey_Type_ID', 'Is_Changed', 'Is_Canceled',
       'cancel_reason', 'change_reason', 'errand_id', 'created',
       'errand_category', 'errand_type', 'errand_action', 'errand_channel',
       'is_test_errand'],
      dtype='object')

In [12]:
merged_df = merged_df[merged_df['is_test_errand'] == 0]
merged_df = merged_df.drop_duplicates(subset='order_id', keep= False)
merged_df = 


In [None]:
merged_df['has_errand'] = merged_df['errand_id'].notnull().astype(int)
