In [216]:
import pandas as pd

def process_ticket_history():
    fans_ticket_history = pd.read_excel('fans_ticket_history_counter.xlsx')
    members_24_25 = pd.read_csv('List_Report_20250429103538.csv')

    # filter active members
    members_24_25 = members_24_25[members_24_25['Status'] == 'Active']
    # prepare the column - tickets_24_25 and the dataframe
    members_24_25['tickets_24_25'] = 1
    members_24_25 = members_24_25[['User Id', 'tickets_24_25']]

    # ensure data types are consistent
    print('fans_ticket_history dtype:', fans_ticket_history['client_number'].dtype)
    print('members_24_25 dtype:', members_24_25['User Id'].dtype)
    print('Shape before processing:', fans_ticket_history.shape)

    # replace values greater than 1 with 1
    columns_to_check = [
        'tickets_23_24', 'tickets_22_23', 'tickets_21_22',
        'tickets_19_20', 'tickets_18_19', 'tickets_17_18',
        'tickets_16_17', 'tickets_15_16', 'tickets_14_15'
    ]
    fans_ticket_history.loc[:, columns_to_check] = fans_ticket_history[columns_to_check].applymap(lambda x: 1 if x > 1 else x)

    # merge
    merged = fans_ticket_history.merge(
        members_24_25,
        left_on='client_number',
        right_on='User Id',
        how='left'
    )

    # identify unmatched members
    unmatched_members = members_24_25[~members_24_25['User Id'].isin(fans_ticket_history['client_number'])]

    # add missing columns to unmatched_members to match merged structure
    for col in merged.columns:
        if col not in unmatched_members.columns:
            unmatched_members[col] = None
    unmatched_members = unmatched_members[merged.columns]

    # append unmatched members
    final_df = pd.concat([merged, unmatched_members], ignore_index=True)

    print('Shape after merging:', final_df.shape)

    # update 'client_number' column
    final_df['client_number'] = final_df.apply(
        lambda row: row['client_number'] if pd.notna(row['client_number']) else row['User Id'],
        axis=1
    )

    # drop the 'User Id' column
    final_df = final_df.drop(columns=['User Id'])

    print('Shape after updating client_number:', final_df.shape)

    # fill None/NaN values with 0
    columns_to_fill = [
        'tickets_24_25', 'tickets_23_24', 'tickets_22_23',
        'tickets_21_22', 'tickets_19_20', 'tickets_18_19', 'tickets_17_18',
        'tickets_16_17', 'tickets_15_16', 'tickets_14_15'
    ]
    final_df[columns_to_fill] = final_df[columns_to_fill].fillna(0)

    final_df['tickets_24_25'] = final_df['tickets_24_25'].fillna(0).astype(int)
    final_df['client_number'] = final_df['client_number'].astype(int)

    # reorder columns
    final_df = final_df[['client_number'] + columns_to_fill]

    # save the final DataFrame
    final_df.to_csv('fans_ticket_history_counter_2024_25.csv', index=False)

    return final_df

In [217]:
final_df = process_ticket_history()

fans_ticket_history dtype: int64
members_24_25 dtype: int64
Shape before processing: (9763, 10)
Shape after merging: (12001, 12)
Shape after updating client_number: (12001, 11)


  fans_ticket_history.loc[:, columns_to_check] = fans_ticket_history[columns_to_check].applymap(lambda x: 1 if x > 1 else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unmatched_members[col] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unmatched_members[col] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unmatched_members[co

In [218]:
final_df

Unnamed: 0,client_number,tickets_24_25,tickets_23_24,tickets_22_23,tickets_21_22,tickets_19_20,tickets_18_19,tickets_17_18,tickets_16_17,tickets_15_16,tickets_14_15
0,10003,1,1,0,0,0,1,1,0,0,1
1,10010,1,1,1,1,1,1,1,1,1,1
2,10011,1,1,1,1,1,1,1,1,1,1
3,10011,1,1,1,1,1,1,1,1,1,1
4,10011,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
11996,26178,1,0,0,0,0,0,0,0,0,0
11997,1330719,1,0,0,0,0,0,0,0,0,0
11998,1330686,1,0,0,0,0,0,0,0,0,0
11999,1330653,1,0,0,0,0,0,0,0,0,0
