In [24]:
import pandas as pd
import numpy as np

In [25]:
def build_transaction_table(transaction_csv_path, users_csv_path):
    # Load transactions and users data from CSV files
    transactions_raw = pd.read_csv(transaction_csv_path)
    users = pd.read_csv(users_csv_path)

    # Convert 'Date' column to datetime format, removing the time component (set to 00:00:00)
    transactions_raw['Date'] = pd.to_datetime(
        transactions_raw['Date'],
        errors='coerce',     # Any invalid date strings will be set to NaT
        # dayfirst=True        # Parse as DD/MM/YYYY
    ).dt.normalize()         # Strip time (set to midnight)

    # Map 'Payment type' strings to integer codes for system compatibility
    conditions = [
        transactions_raw['Payment type'] == 'Internet',
        transactions_raw['Payment type'] == 'CashDesk',
        transactions_raw['Payment type'] == 'Api'
    ]
    choices = [1, 2, 4]
    transactions_raw['Payment type'] = np.select(conditions, choices, default=transactions_raw['Payment type'])

    # Define the target output columns for the final DataFrame
    columns = [
        'Roboticket ID', 'Owner Account User ID', 'Cowner Account User ID',
        'Payment Date', 'Finish Date', 'Created At', 'Updated On', 'Base Price',
        'Delivery Price', 'Price',
        'Payment Type Number', 'Payment Gate', 'Payment Method', 'Status',
        'Sales Man', 'Created By'
    ]

    # Build the initial transactions DataFrame with data from raw input
    transactions_new = pd.DataFrame({
        'Roboticket ID': transactions_raw['Transaction identifier'],
        'Owner Account User ID': None,  # will be filled with email or ID later
        'Cowner Account User ID': None,
        'Payment Date': transactions_raw['Date'],
        'Finish Date': transactions_raw['Date'],
        'Created At': transactions_raw['Date'],
        'Updated On': transactions_raw['Date'],
        'Base Price': transactions_raw['Products price'],
        'Delivery Price': transactions_raw['Delivery price'],
        'Price': transactions_raw['Total'],
        'Payment Type Number': transactions_raw['Payment type'],
        'Payment Gate': None,
        'Payment Method': transactions_raw['Payment method'],
        'Status': None,
        'Sales Man': None,
        'Created By': None,
    })[columns]  # Reorder to match the exact column structure

    # Identify rows where only the owner email is present (no co-owner)
    mask_owner_only = (
        transactions_raw['Transaction owner email'].notna() &
        transactions_raw['TransactionCoownerEmail'].isna()
    )

    # Identify rows where both owner and co-owner emails are present
    mask_both_present = (
        transactions_raw['Transaction owner email'].notna() &
        transactions_raw['TransactionCoownerEmail'].notna()
    )

    # Fill in email addresses accordingly
    transactions_new.loc[mask_owner_only, 'Owner Account User ID'] = transactions_raw.loc[mask_owner_only, 'Transaction owner email']
    transactions_new.loc[mask_both_present, 'Cowner Account User ID'] = transactions_raw.loc[mask_both_present, 'TransactionCoownerEmail']
    transactions_new.loc[mask_both_present, 'Sales Man'] = transactions_raw.loc[mask_both_present, 'Transaction owner email']

    # Prepare the users table:
    # - Ensure emails are strings
    # - Drop duplicate email entries (avoid merge expansion)
    # - Select only relevant user columns for merging
    users['Email'] = users['Email'].astype(str)
    users = users.drop_duplicates(subset='Email')
    users_min = users[['Email', 'First name', 'Last name', 'id']]

    # Ensure email columns in transactions are also strings for merging
    transactions_new['Owner Account User ID'] = transactions_new['Owner Account User ID'].astype(str)
    transactions_new['Cowner Account User ID'] = transactions_new['Cowner Account User ID'].astype(str)

    # Merge to match owner emails to user data
    df_owner = transactions_new.merge(
        users_min,
        left_on='Owner Account User ID',
        right_on='Email',
        how='left',
        suffixes=('', '_owner')  # Suffix for any overlapping columns from user data
    )

    # Merge again to match co-owner emails to user data
    df_full = df_owner.merge(
        users_min,
        left_on='Cowner Account User ID',
        right_on='Email',
        how='left',
        suffixes=('', '_cowner')
    )

    # Replace email values in owner/co-owner fields with user IDs if matched
    df_full.loc[df_full['Owner Account User ID'] == df_full['Email'], 'Owner Account User ID'] = df_full['id']
    df_full.loc[df_full['Owner Account User ID'] == df_full['Email_cowner'], 'Owner Account User ID'] = df_full['id_cowner']

    df_full.loc[df_full['Cowner Account User ID'] == df_full['Email'], 'Cowner Account User ID'] = df_full['id']
    df_full.loc[df_full['Cowner Account User ID'] == df_full['Email_cowner'], 'Cowner Account User ID'] = df_full['id_cowner']

    # Return only the relevant columns in the final cleaned transaction table
    df_result = df_full[columns]

    return df_result


In [26]:
final_df = build_transaction_table(
    'TransactionReportCsv_From_2024-05-01_To_2025-06-25.csv',
    'StatisticReport_2025-06-23-2.csv'
)

final_df

  transactions_raw = pd.read_csv(transaction_csv_path)


Unnamed: 0,Roboticket ID,Owner Account User ID,Cowner Account User ID,Payment Date,Finish Date,Created At,Updated On,Base Price,Delivery Price,Price,Payment Type Number,Payment Gate,Payment Method,Status,Sales Man,Created By
0,3274343.0,11457.0,,2024-05-02,2024-05-02,2024-05-02,2024-05-02,150.0,0.0,150.0,2,,Cash,,,
1,3274344.0,11635.0,,2024-05-02,2024-05-02,2024-05-02,2024-05-02,100.0,0.0,100.0,2,,Cash,,,
2,3274345.0,11635.0,,2024-05-02,2024-05-02,2024-05-02,2024-05-02,100.0,0.0,100.0,2,,Cash,,,
3,3274346.0,11905.0,,2024-05-02,2024-05-02,2024-05-02,2024-05-02,150.0,0.0,150.0,2,,Cash,,,
4,3274347.0,11905.0,,2024-05-02,2024-05-02,2024-05-02,2024-05-02,150.0,0.0,150.0,2,,Cash,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69138,3747026.0,,11745.0,2025-06-23,2025-06-23,2025-06-23,2025-06-23,0.0,0.0,0.0,2,,Cash,,shoham_hapoel,
69139,3747033.0,,13820.0,2025-06-24,2025-06-24,2025-06-24,2025-06-24,0.0,0.0,0.0,2,,Cash,,shoham_hapoel,
69140,3747034.0,,31370.0,2025-06-24,2025-06-24,2025-06-24,2025-06-24,0.0,0.0,0.0,2,,Cash,,shoham_hapoel,
69141,3747036.0,,48965.0,2025-06-24,2025-06-24,2025-06-24,2025-06-24,-850.0,0.0,-850.0,2,,Pelecard_Transfer,,shoham_hapoel,


In [28]:
final_df.to_excel('all_transactions2425_2526.xlsx')