In [1]:
import pandas as pd

In [3]:
def process_and_merge_games(file_paths):

    processed_dfs = []
    
    for file_path in file_paths:
        df = pd.read_csv(file_path)
        
        # 'Source' column and filter for "tickets"
        df['Source'] = df.apply(lambda row: "tickets" if row['Type'] == 'Ticket' and pd.isna(row['STRefID']) else "members", axis=1)
        df = df[df['Source'] == 'tickets']
        
        # remove duplicates and drop rows with missing key columns
        df = df.drop_duplicates(subset=['assign using  ID number'])
        df = df.dropna(subset=['assign using  ID number', 'First name', 'Last name', 'Email', 'Phone No.'])
        
        # relevant columns
        df = df[['assign using  ID number', 'First name', 'Last name', 'Email', 'Phone No.']]
        
        # append the processed DataFrame to the list
        processed_dfs.append(df)
    
    # merge
    final_df = processed_dfs[0]
    for i, df in enumerate(processed_dfs[1:], start=1):
        final_df = pd.merge(
            final_df, 
            df, 
            on='assign using  ID number', 
            how='inner',  # find common attendees
            suffixes=(f'', f'_game{i+1}')
        )
    
    # drop unnecessary columns dynamically
    columns_to_drop = [col for col in final_df.columns if '_game' in col or col == 'Event name']
    final_df = final_df.drop(columns=columns_to_drop)
    
    # rename columns ending with '_game1' to their original names
    final_df.columns = [col.replace('_game1', '') if col.endswith('_game1') else col for col in final_df.columns]
    
    return final_df

file_paths = [
    'AuthorizedAttendanceReportCsv_🏠 מחזור11_ הפועל ״בנק יהב״ ירושלים -מכבי ת"א.csv',
    'AuthorizedAttendanceReportCsv_Round 2_ Hapoel Tel Aviv 🚗.csv',
    'AuthorizedAttendanceReportCsv_גמר ליגת ווינר משחק 2_ מכבי תל אביב 🏠.csv',
    'AuthorizedAttendanceReportCsv_חצי גמר משחק 2_ הפועל תל אביב 🏠.csv'
]

final_df = process_and_merge_games(file_paths)

final_df.to_excel('big_games_attendees.xlsx')

final_df

  df = pd.read_csv(file_path)


Unnamed: 0,assign using ID number,First name,Last name,Email,Phone No.
0,318591658,גיא,כהן,cguymichael@gmail.com,508230306
1,217286798,אור,מילביצקי,ormil2008@gmail.com,5152572013
2,217527126,יואב,סמואל,yoavsamuel8@gmail.com,585907569
3,331547927,אילעי,יחזקאל,ylyyhzql9@gmail.com,507190069
4,324817345,נדב,אבן ארי,nadavea@gmail.com,526927178
5,36455863,איציק,אלפסי,iatk1644@gmail.com,522608894
6,206341323,אלי,ברקו,eliberko@gmail.com,542508748
7,335598553,אלון,לוי,natachametzger@hotmail.com,544335614
8,217524032,ידידיה,ברוש,yedidyabrosh@gmail.com,555692677
9,216255323,עידו,ששון,idosa3108@gmail.com,586420200
