In [1]:
import pandas as pd
import numpy as np

In [23]:
seasontickets = pd.read_csv('seasontickets.csv')
chair_mapping = pd.read_excel('chair_mapping_20240801.xlsx')
sf = pd.read_csv('extract.csv')

installments = pd.read_csv('HapoelInstallmentsReportCsv_2025-08-13.csv')
installments = installments.rename(columns={'InstallmentTransactionId':'Transaction',
                                            'Installment Ticket Id': 'Return ID'})

## Separate data by type:

In [24]:
print('seasontickets shape:', seasontickets.shape)

seasontickets shape: (6446, 52)


In [25]:
seasontickets = seasontickets.loc[~seasontickets['Product'].str.contains('|'.join(['חניה', 'חניית', 'חיצונית']), na=False)]

return_dat = seasontickets[(seasontickets['Type'] == 'Return') & (seasontickets['Status'] == 'Canceled')]
print('return_dat shape:', return_dat.shape)

sale_dat = seasontickets[(seasontickets['Type'] == 'Sale') & (seasontickets['Status'] == 'Canceled')]
print('sale_dat shape:', sale_dat.shape)


return_dat shape: (701, 52)
sale_dat shape: (1062, 52)


In [27]:
return_dat.to_excel('return_dat_seasontickets.xlsx')

In [18]:
all_products = seasontickets.loc[~seasontickets['Product'].str.contains('|'.join(['חניה', 'חניית', 'חיצונית']), na=False), 'Product'].unique()

In [19]:
print('Installments shape before:', installments.shape)
installments = installments[installments['InstallmentProducts'].isin(all_products)]
print('Installments shape after:', installments.shape)

Installments shape before: (161593, 10)
Installments shape after: (11104, 10)


## Separate to different use cases:

In [20]:
df = return_dat.copy()

# add count column that counts the occurrences of each product in transaction
df['count'] = (
    df.groupby(['Transaction', 'Product'])['Transaction']
      .transform('size')
)

multi = df[df['count'] > 1]
print('Rows with more than one product:', multi.shape)

single = df[df['count'] == 1]
print('Rows with exactly one product:', single.shape)

Rows with more than one product: (575, 53)
Rows with exactly one product: (126, 53)


## Merge single table with return_dat:

In [21]:
single['Transaction'] = single['Transaction'].astype(int)
installments['Transaction'] = installments['Transaction'].astype(int)
return_single_table = single.merge(installments[['Return ID', 'Transaction']], on='Transaction', how='left')

return_single_table

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single['Transaction'] = single['Transaction'].astype(int)


Unnamed: 0,Product Id,Product,Status,Id,Fan / Company,User Id,assign using ID number,First name,Last name,Email,...,Delivery type,Ticket Note,Transaction Note,Role,ReturnRequestStatus,Voucher number,Voucher value,Unnamed: 51,count,Return ID
0,62,מנוי לעונת 2024/2025,Canceled,815226,עדו מידןטסט,1330192,043655885,עדו,מידןטסט,idomd123@gmail.com,...,Virtual Card,,,Administrator,,,,,1,234826.0
1,62,מנוי לעונת 2024/2025,Canceled,861357,בדיקה סנטי,1356988,999024300,בדיקה,סנטי,almogbl+6@moveo.co.il,...,Virtual Card,,,Administrator,,1412313/6507,,,1,234824.0
2,62,מנוי לעונת 2024/2025,Canceled,861671,בדיקה סנטי,1356988,999024300,בדיקה,סנטי,almogbl+6@moveo.co.il,...,Virtual Card,,,Administrator,,1412313/6507,,,1,234822.0
3,62,מנוי לעונת 2024/2025,Canceled,861670,בדיקה סנטי,1356988,999024300,בדיקה,סנטי,almogbl+6@moveo.co.il,...,Virtual Card,,,Administrator,,1412313/6507,,,1,234807.0
4,62,מנוי לעונת 2024/2025,Canceled,861672,בדיקה סנטי,1356988,999024300,בדיקה,סנטי,almogbl+6@moveo.co.il,...,Virtual Card,,,Administrator,,1412313/6507,,,1,234804.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121,62,מנוי לעונת 2024/2025,Canceled,811499,דניאל חולי,35278,203082805,דניאל,חולי,danielhuli11@gmail.com,...,Virtual Card,,,Administrator,,,,,1,176463.0
122,62,מנוי לעונת 2024/2025,Canceled,807879,גבי פרץ,12140,61605879,גבי,פרץ,gabipe@walla.com,...,Virtual Card,,,Administrator,,,,,1,176359.0
123,62,מנוי לעונת 2024/2025,Canceled,804910,מיכאל חייט,26168,312185655,מיכאל,חייט,hayatmiki2@gmail.com,...,Virtual Card,,,Cashier,,,,,1,176333.0
124,62,מנוי לעונת 2024/2025,Canceled,804897,דורון הרשקו,24745,205461965,דורון,הרשקו,doronhershko@gmail.com,...,Virtual Card,,,Cashier,,,,,1,176331.0


### Summing the number of tickets and comparing it to the number of unique prices:

In [22]:
print('return_dat.shape:', return_dat.shape)
print('multi.shape:', multi.shape)

return_dat.shape: (701, 52)
multi.shape: (575, 53)


In [45]:
seasontickets = seasontickets[seasontickets['Type'] == 'Return']
    
# Rename columns
seasontickets = seasontickets.rename(columns={
    'Product Id': 'Product ID',
    'Product': 'Asset Name',
    'Id': 'Roboticket ID',
    'Price area': 'Price Area Name',
    'Ticket price types': 'Price Name',
    'Base price': 'Base Price',
    'Type': 'Operation Name',
    'Area': 'Sector',
    'Date.1': 'Product Date',
    'User Id': 'Owner User ID',
    'assign using  ID number': 'Owner Identifier'
})

# Add missing columns
new_columns = [
    'Season ID', 'From', 'To', 'Stadium ID', 'Product Type', 'Product Name', 'Operation', 'Return Type', 'Return ID',
    'Platform', 'Entrance Text', 'Seat ID', 'Price Modifier Value', 'Access Token', 'Ticket ID', 'Transaction Owner',
    'Description', 'Account', 'Account ID', 'Contact', 'Product', 'Purchaser Account',
    'Return Asset', 'Status', 'Is Ticket', 'Is Season Ticket', 'Created By', 'Last Modified By'
]
for col in new_columns:
    if col not in seasontickets.columns:
        seasontickets[col] = None

# Reorder columns
desired_columns = [
    'Product ID', 'Season ID', 'Asset Name', 'Roboticket ID', 'Price', 'Price Name', 'Price Area Name', 'Base Price', 'From', 'To',
    'Stadium ID', 'Sector', 'Row', 'Number', 'Product Type', 'Operation Name', 'Product Name', 'Product Date',
    'Operation', 'Return Type', 'Return ID', 'Owner User ID', 'Owner Identifier', 'Platform', 'Entrance Text',
    'Seat ID', 'Price Modifier Value', 'Access Token', 'Ticket ID', 'Transaction Owner', 'Purchaser Account', 'Stadium',
    'Description', 'Account', 'Account ID', 'Contact', 'Product', 'Transaction', 'Return Asset', 'Status', 'Is Ticket',
    'Is Season Ticket', 'Created By', 'Last Modified By'
]
for col in desired_columns:
    if col not in seasontickets.columns:
        seasontickets[col] = None
seasontickets = seasontickets[desired_columns]

# Set default values
seasontickets['Season ID'] = 24
seasontickets['Product Name'] = seasontickets['Asset Name']
seasontickets['Product Type'] = 'Match'
seasontickets['Purchaser Account'] = seasontickets['Owner User ID']
seasontickets['Is Ticket'] = False
seasontickets['Is Season Ticket'] = True
seasontickets['Stadium ID'] = np.where(
    seasontickets['Stadium'] == 'פיס ארנה', 1, seasontickets['Stadium ID']
)

# Define mapping function for Hebrew sectors
def map_sector_to_section_name(sector):
    sector = str(sector).strip()
    if sector.startswith('אולם'):
        number = ''.join(filter(str.isdigit, sector))
        return f'Court_{number}' if number else None
    elif 'תא צפוני' in sector:
        return 'Suite_21'
    elif 'תא צפיה' in sector:
        number = ''.join(filter(str.isdigit, sector))
        if number and 1 <= int(number) <= 16:
            return f'Suite_{number}'
    elif 'פרקט דרומי' in sector:
        return 'Court_side_17'
    elif 'פרקט צפוני' in sector:
        return 'Court_side_19'
    elif 'פרקט מרכזי' in sector or 'פרקט מרכז' in sector:
        return 'Court_side_18'
    elif 'גלריה' in sector:
        number = ''.join(filter(str.isdigit, sector))
        return f'Gallery_{number}' if number else None
    elif 'Courtyard' in sector:
        return 'Unmarked_12'
    return None

# Map section name
seasontickets['section_name'] = seasontickets['Sector'].apply(map_sector_to_section_name)
seasontickets['Row'] = seasontickets['Row'].astype(str).str.strip()
seasontickets['Number'] = seasontickets['Number'].astype(str).str.strip()

# Load chair mapping
chair_mapping['row_number'] = chair_mapping['row_number'].astype(str).str.strip()
chair_mapping['seat_number'] = chair_mapping['seat_number'].astype(str).str.strip()

# Merge to get Seat ID
seasontickets = seasontickets.merge(
    chair_mapping[['seat_id', 'section_name', 'row_number', 'seat_number']],
    how='left',
    left_on=['section_name', 'Row', 'Number'],
    right_on=['section_name', 'row_number', 'seat_number']
)

seasontickets['Seat ID'] = seasontickets['seat_id']
seasontickets.drop(columns=['seat_id', 'section_name', 'row_number', 'seat_number'], inplace=True)

# Load SF data
seasontickets_ids = seasontickets[['Owner Identifier', 'Account ID']].copy()
merged_ids = seasontickets_ids.merge(
    sf[['id__c', 'AccountId']].astype(str).apply(lambda x: x.str.strip()),
    how='left',
    left_on='Owner Identifier',
    right_on='id__c'
)
merged_ids['Account ID'] = merged_ids['Account ID'].fillna(merged_ids['AccountId'])
merged_ids.drop(columns=['id__c', 'AccountId'], inplace=True)
seasontickets['Account ID'] = merged_ids['Account ID']

seasontickets

Unnamed: 0,Product ID,Season ID,Asset Name,Roboticket ID,Price,Price Name,Price Area Name,Base Price,From,To,...,Account ID,Contact,Product,Transaction,Return Asset,Status,Is Ticket,Is Season Ticket,Created By,Last Modified By
0,62,24,מנוי לעונת 2024/2025,815226,0,Child (renew),F,0,,,...,001Ps00000o67BjIAI,,,3734390,,Canceled,False,True,,
1,62,24,מנוי לעונת 2024/2025,861357,0,Adult,Gold,0,,,...,001Ps00000O7zKHIAZ,,,3734385,,Canceled,False,True,,
2,62,24,מנוי לעונת 2024/2025,861671,0,Adult,F,0,,,...,001Ps00000O7zKHIAZ,,,3734379,,Canceled,False,True,,
3,62,24,מנוי לעונת 2024/2025,861667,0,Community,A,0,,,...,001Ps00000O7zKHIAZ,,,3734370,,Canceled,False,True,,
4,62,24,מנוי לעונת 2024/2025,861669,0,Adult,Silver Up,0,,,...,001Ps00000O7zKHIAZ,,,3734370,,Canceled,False,True,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,62,24,מנוי לעונת 2024/2025,811269,-1550,Adult (renew),B,0,,,...,001Ps00000aAiz8IAC,,,3476816,,Canceled,False,True,,
697,62,24,מנוי לעונת 2024/2025,807879,-2100,Adult (renew),A,0,,,...,001Hu00003AEw1WIAT,,,3474808,,Canceled,False,True,,
698,62,24,מנוי לעונת 2024/2025,804910,-1050,Adult (renew),C,0,,,...,001Hu00003AEzPRIA1,,,3473754,,Canceled,False,True,,
699,62,24,מנוי לעונת 2024/2025,804897,-1050,Adult (renew),C,0,,,...,001Hu00003AEyDqIAL,,,3473700,,Canceled,False,True,,


In [46]:
installments = pd.read_csv('HapoelInstallmentsReportCsv_2025-08-13.csv')

installments = installments.rename(columns={'InstallmentTransactionId':'Transaction',
                                            'Installment Ticket Id': 'Return ID'})
installments

Unnamed: 0,Return ID,Transaction,InstallmentDate,InstallmentValueDate,InstallmentProducts,InstallmentPaymentPrice,InstallmentProductPrice,InstallmentPaymentExtRef,InstallmentProductExtRef,Installments
0,,3596914,01/01/2025,01/01/2025,Other Payment,0.0,,Other,,
1,3163501.0,3596914,01/01/2025,,🏠 מחזור 13 (רדיוס בחולון): מכבי עירוני רמת גן,,0.0,,4114.0,
2,3163500.0,3596914,01/01/2025,,🏠 מחזור 13 (רדיוס בחולון): מכבי עירוני רמת גן,,0.0,,4114.0,
3,,3596915,01/01/2025,01/01/2025,Other Payment,0.0,,Other,,
4,3163497.0,3596915,01/01/2025,,🏠 מחזור 13 (רדיוס בחולון): מכבי עירוני רמת גן,,0.0,,4114.0,
...,...,...,...,...,...,...,...,...,...,...
161588,3163489.0,3596912,31/12/2024,,🏠 מחזור 13 (רדיוס בחולון): מכבי עירוני רמת גן,,0.0,,4114.0,
161589,3163490.0,3596912,31/12/2024,,🏠 מחזור 13 (רדיוס בחולון): מכבי עירוני רמת גן,,0.0,,4114.0,
161590,3163491.0,3596912,31/12/2024,,🏠 מחזור 13 (רדיוס בחולון): מכבי עירוני רמת גן,,0.0,,4114.0,
161591,,3596913,31/12/2024,31/12/2024,Other Payment,0.0,,Other,,


In [None]:
# --- align + standardize ---
tickets  = seasontickets.copy()
installs = installments.copy()

tickets['Transaction']  = pd.to_numeric(tickets['Transaction'],  errors='coerce').astype('Int64')
tickets['Roboticket ID'] = pd.to_numeric(tickets['Roboticket ID'], errors='coerce').astype('Int64')
installs['Transaction'] = pd.to_numeric(installs['Transaction'], errors='coerce').astype('Int64')

# normalize ReturnID column name on installments
ret_cols = ['Return ID', 'Installment Ticket Id', 'Installment Ticket ID', 'ReturnID']
ret_col  = next(c for c in ret_cols if c in installs.columns)
installs = installs.rename(columns={ret_col: 'ReturnID'})

# --- keep only installments for transactions that exist in tickets ---
tx_df     = tickets[['Transaction']].drop_duplicates()
inst_kept = installs.merge(tx_df, on='Transaction', how='inner')

# --- assign within-transaction index on both sides and pair ---
tickets_ord = (
    tickets.sort_values(['Transaction','Roboticket ID'])
           .drop_duplicates(['Transaction','Roboticket ID'])
           .assign(k=lambda d: d.groupby('Transaction').cumcount())
)

inst_ord = (
    inst_kept.sort_values(['Transaction','ReturnID','InstallmentDate'])
             .assign(k=lambda d: d.groupby('Transaction').cumcount())
             [['Transaction','k','ReturnID']]   # add more cols here if you want them carried over
)

paired = tickets_ord.merge(inst_ord, on=['Transaction','k'], how='left', validate='one_to_one') \
                    .drop(columns=['k'])

paired['ReturnID'] = paired['ReturnID'].astype('Int64')

### Validation:

In [48]:
dups_global = (paired.dropna(subset=['Return ID'])
                     .loc[lambda d: d.duplicated('Return ID', keep=False)]
                     .sort_values('Return ID'))
print("Global duplicates in paired:", dups_global['Return ID'].nunique())


Global duplicates in paired: 0


In [49]:
paired.iloc[0]

Product ID                                62
Season ID                                 24
Asset Name              מנוי לעונת 2024/2025
Roboticket ID                         802190
Price                                  -1150
Price Name                             Adult
Price Area Name                            F
Base Price                                 0
From                                    None
To                                      None
Stadium ID                              None
Sector                               3 גלריה
Row                                       11
Number                                     4
Product Type                           Match
Operation Name                        Return
Product Name            מנוי לעונת 2024/2025
Product Date                      2024-07-01
Operation                               None
Return Type                             None
Return ID                               None
Owner User ID                          13885
Owner Iden

In [50]:
paired.to_excel('seasontickets2425_with_returnID.xlsx')