In [3]:
import pandas as pd
import numpy as np
from datetime import date, timedelta

## Creation of the report:

### NO NEED TO READ THE USERS FILE & ST_2425 AGAIN!

In [4]:
# Load the users report Excel file
users = pd.read_excel('Users report-2025-07-02-02-14-55.xlsx')

# Drop the first 13 rows (probably header or metadata rows)
users = users.drop(index=range(0, 13))

# Remove duplicates based on "ID"
users = users.drop_duplicates(subset=["ID"])

# Remove duplicates based on "HJBC ID"
users = users.drop_duplicates(subset=["HJBC ID"])

# Convert "Marketing Allowed" from numeric to Boolean
users["Marketing Allowed"] = users["Marketing Allowed"].map({
    1.0: True,
    0.0: False
})

# Drop any rows where "ID" is missing
users = users.dropna(subset=["ID"])

# Keep only relevant columns
users = users[[
    'Account Name',
    'ID',
    'HJBC ID',
    'Phone',
    'Email',
    'Created Date',
    'Marketing Allowed',
]]

### Users who have price = 0 and we can call them & users who used vouchers:

In [None]:
st_2425 = pd.read_csv('List_Report_20250609080140.csv')

def prepare_cleaned_season_data(
    st_csv_path: str,
    vouchers_csv_path: str,
    vouchers_to_exclude: list,
    max_user_rows: int = 10
) -> pd.DataFrame:
    """
    Load, clean, and merge season ticket data with voucher usage, applying specific filters per product and type.
    """

    # Load CSVs
    st = pd.read_csv(st_csv_path)
    vouchers = pd.read_csv(vouchers_csv_path)

    # Convert price columns to numeric
    st["Price"] = pd.to_numeric(st["Price"], errors="coerce").astype("Int64")
    st["Base price"] = pd.to_numeric(st["Base price"], errors="coerce").astype("Int64")

    # --------------------
    # Identify conditions for removing duplicate Sales
    # --------------------
    active_sale = (st["Status"] == "Active") & (st["Type"] == "Sale")
    canceled_not_sale = (st["Status"] == "Canceled") & (st["Type"] != "Sale")

    users_with_both = set(st[active_sale]["User Id"]) & set(st[canceled_not_sale]["User Id"])

    # Remove Active Sales for those users
    st_filtered = st[~(active_sale & st["User Id"].isin(users_with_both))].reset_index(drop=True)

    # Remove duplicates
    st_filtered = st_filtered.drop_duplicates()

    # Prepare vouchers DataFrame
    vouchers = vouchers[["Voucher number", "User first name", "User last name", "Package name"]]

    # Merge vouchers
    st_filtered = st_filtered.merge(
        vouchers,
        on="Voucher number",
        how="left"
    )

    # Keep only rows where Voucher number is missing
    st_filtered = st_filtered[st_filtered["Voucher number"].isna()]

    # Normalize text for comparison
    st_filtered["Status"] = st_filtered["Status"].str.lower()
    st_filtered["Type"] = st_filtered["Type"].str.lower()

    # Remove duplicate rows for the same "Fan / Company"
    same_fan = st_filtered["Fan / Company"] == st_filtered["Fan / Company"].shift(1)
    not_canceled = st_filtered["Status"] != "canceled"
    is_sale = st_filtered["Type"] == "sale"

    to_drop = same_fan & not_canceled & is_sale
    st_filtered = st_filtered[~to_drop].reset_index(drop=True)

    # Remove specific voucher codes
    st_filtered = st_filtered[~st_filtered["Voucher number"].isin(vouchers_to_exclude)]

    # --------------------
    # Product-based filtering
    # --------------------
    # For 2024/2025
    mask_2425 = (
        (st["Product"] == "מנוי לעונת 2024/2025") &
        (
            (st["Status"] == "Active") |
            ((st["Status"] == "Canceled") & (st["Type"] == "SeatChange")) |
            (st["Type"].isin(["SeasonTicketExchange", "Return", "Sale"]))
        )
    )

    # For 2025/2026
    mask_2526 = (
        (st["Product"] == "מנוי לעונת 2025/2026") &
        (st["Type"] != "Return")
    )

    # For other products
    mask_other = ~st["Product"].isin(["מנוי לעונת 2024/2025", "מנוי לעונת 2025/2026"])

    # Combine masks
    keep_rows = mask_2425 | mask_2526 | mask_other

    st = st[keep_rows].reset_index(drop=True)

    # --------------------
    # Define st_active again after filtering
    # --------------------
    st_active = st[
        (st["Status"] == "Active") |
        ((st["Status"] == "Canceled") & (st["Type"] == "SeatChange")) |
        (st["Type"].isin(["SeasonTicketExchange", "Return", "Sale"]))
    ]

    # Merge filtered info back onto Active
    df = st_active.merge(st_filtered, on="User Id", how="left")

    # Limit users with too many rows
    df = df[df["User Id"].map(df["User Id"].value_counts()) < max_user_rows]

    # Drop duplicates
    df = df.drop_duplicates(subset="User Id")

    return df

In [6]:
vouchers_to_exclude = [
    "irit1", "Dawi1", "FIXST", "irit2", "iheartdudu", "Testa1",
    "aharoniST2", "Ngordon1", "MDST2", "FIXST3",
    "Karol1", "Testa2", "dudu1", "Ngordon2", "Dawi2"
]

df = prepare_cleaned_season_data(
    st_csv_path="List_Report_20250609080140.csv",
    vouchers_csv_path="VoucherUseReport_2025-07-03.csv",
    vouchers_to_exclude=vouchers_to_exclude
)

In [7]:
def combine_x_y_columns(df, drop_original=False):
    merged_cols = [col for col in df.columns if col.endswith('_x')]
    for merged_col in merged_cols:
        base = merged_col.rsplit('_', 1)[0]
        ongoing_col = base + '_y'
        if ongoing_col in df.columns:
            df[base] = df[merged_col].combine_first(df[ongoing_col])
            if drop_original:
                df.drop(columns=[merged_col, ongoing_col], inplace=True)
    return df

df = combine_x_y_columns(df, drop_original=True)
df

  df[base] = df[merged_col].combine_first(df[ongoing_col])
  df[base] = df[merged_col].combine_first(df[ongoing_col])
  df[base] = df[merged_col].combine_first(df[ongoing_col])
  df[base] = df[merged_col].combine_first(df[ongoing_col])


Unnamed: 0,User Id,User first name,User last name,Package name,Product Id,Product,Status,Id,Fan / Company,assign using ID number,...,Payment type,Payment method,Delivery type,Ticket Note,Transaction Note,Role,ReturnRequestStatus,Voucher number,Voucher value,Unnamed: 51
0,1373314,,,,62,מנוי לעונת 2024/2025,Active,878881,יואב אולשינקה,339557373,...,Cash,Cash,Online ticket,,,Administrator,,Youthd,,
1,41088,,,,62,מנוי לעונת 2024/2025,Active,878880,יובל אולשינקה,336909007,...,Cash,Cash,Online ticket,,,Administrator,,Youthd,,
4954,1420313,,,,62,מנוי לעונת 2024/2025,Active,877429,פורטרס בדיקה,999820962,...,Cash,Cash,Virtual Card,,,Administrator,,1412313/6507,,
4955,1420297,,,,62,מנוי לעונת 2024/2025,Active,877428,גיל מליחי,319030359,...,Cash,Cash,Virtual Card,,,Administrator,,1410795/9430,,
128756,1331180,,,,62,מנוי לעונת 2024/2025,Active,876077,יוסי אוברגוט,056629033,...,Cash,Cash,Virtual Card,,,Administrator,,1412313/6507,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34651635,1330719,,,,62,מנוי לעונת 2024/2025,Canceled,801858,Testing ROBO,336008404,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,1409087/9782,,
34654113,1330686,,,,62,מנוי לעונת 2024/2025,Active,801760,Test ROBO,324100221,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,1409089/7532,,
34654114,1330653,,,,62,מנוי לעונת 2024/2025,Canceled,801727,Test twelve,221443211,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,1409087/9782,,
34654116,1330620,,,,62,מנוי לעונת 2024/2025,Canceled,801660,eleven test,229580105,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,1409080/5266,,


In [8]:
# 🟢 Exclude rows with specific OldPriceType values
values_to_drop = ["מחלקת נוער", "Community", "עסקי", "מחלקת נוער בי\\"]
df = df[~df['Ticket price types'].isin(values_to_drop)]

# 🟢 Keep relevant columns
df = df[[
    'Product', 'Fan / Company', 'User Id',
    'assign using  ID number', 'Email', 'Phone',
    'Base price', 'Price', 'Price area', 'Type'
]]

# 🟢 Convert IDs to numeric
users["ID"] = pd.to_numeric(users["ID"], errors="coerce")
df['assign using  ID number'] = pd.to_numeric(df['assign using  ID number'], errors="coerce").astype("Int64")
df["User Id"] = pd.to_numeric(df["User Id"], errors="coerce").astype("Int64")
users['HJBC ID'] = pd.to_numeric(users['HJBC ID'], errors="coerce").astype("Int64")

# 🟢 Merge users and df
base_df = users.merge(
    df,
    left_on="HJBC ID",
    right_on="User Id",
    how="left"
)

# 🟢 Combine columns only within base_df
base_df["Fan / Company"] = base_df["Fan / Company"].combine_first(base_df["Account Name"])
base_df["User Id"] = base_df["User Id"].combine_first(base_df["HJBC ID"])
base_df["assign using  ID number"] = base_df["assign using  ID number"].combine_first(base_df["ID"])
base_df["Phone"] = base_df["Phone_x"].combine_first(base_df["Phone_y"])
base_df["Email"] = base_df["Email_x"].combine_first(base_df["Email_y"])

# Drop old columns
base_df = base_df.drop(columns=[
    "HJBC ID", "ID", "Phone_x", "Phone_y", "Email_x", "Email_y"
])

base_df

Unnamed: 0,Account Name,Created Date,Marketing Allowed,Product,Fan / Company,User Id,assign using ID number,Base price,Price,Price area,Type,Phone,Email
0,שי קציר,5/7/2024,False,מנוי לעונת 2024/2025,שי קציר,10010,40175192.0,1550,1550,B,Sale,0502877926,katzirs30@gmail.com
1,שלמה קוטלר,5/7/2024,True,מנוי לעונת 2024/2025,שלמה קוטלר,10011,94938.0,3300,3300,Silver Up,Sale,0544755212,rachel@kotler-adika.co.il
2,יונתן אנסלמן,5/7/2024,True,,יונתן אנסלמן,10528,38818340.0,,,,,0524567167,yonie84@gmail.com
3,רונן נהרי,5/7/2024,False,מנוי לעונת 2024/2025,רונן נהרי,10529,59723486.0,2100,2100,A,Sale,0528523030,48ronen@gmail.com
4,עידו פלדור,5/7/2024,False,מנוי לעונת 2024/2025,עידו פלדור,10536,33486960.0,1050,1050,C,Sale,(050) 894-6047,paldor1@gmail.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22564,מיכל אהלי,7/1/2025,False,,מיכל אהלי,1430149,209490556.0,,,,,,
22565,יוחנן שיפמן,7/1/2025,False,,יוחנן שיפמן,11085,11085.0,,,,,0507874290,
22566,עומר קרני,7/1/2025,False,,עומר קרני,31779,205417157.0,,,,,0523142105,omerkarni24@gmail.com
22567,אפרים בורשטיין,7/1/2025,False,,אפרים בורשטיין,13542,13542.0,,,,,0526966877,


In [9]:
# 🟢 Load renewal report
renewal = pd.read_csv('SeasonTicketRenewReport_2025-07-02.csv').drop_duplicates(subset="User")
renewal['User'] = pd.to_numeric(renewal['User'], errors="coerce").astype("Int64")

# 🟢 Merge renewal info ON IDs
merged = base_df.merge(
    renewal,
    left_on="assign using  ID number",
    right_on="User",
    how="left"
)

# Combine phone and email within merged DataFrame only
merged["Phone"] = merged["Phone_x"].combine_first(merged["Phone_y"])
merged["Email"] = merged["Email_x"].combine_first(merged["Email_y"])

# 🟢 Seat builders
def clean_number(x):
    if pd.isna(x):
        return ''
    if isinstance(x, float) and x.is_integer():
        return str(int(x))
    return str(x)

def build_old_seat(row):
    if pd.isna(row['OldSector']) or pd.isna(row['OldRow']) or pd.isna(row['OldNumber']):
        return None
    return (
        str(row['OldSector']) +
        " שורה " +
        clean_number(row['OldRow']) +
        " כיסא " +
        clean_number(row['OldNumber'])
    )

def build_renew_seat(row):
    if pd.isna(row['RenewSector']) or pd.isna(row['RenewRow']) or pd.isna(row['RenewNumber']):
        return None
    return (
        str(row['RenewSector']) +
        " שורה " +
        clean_number(row['RenewRow']) +
        " כיסא " +
        clean_number(row['RenewNumber'])
    )

merged['Old Seat'] = merged.apply(build_old_seat, axis=1)
merged['Renew Seat'] = merged.apply(build_renew_seat, axis=1)

# 🟢 Renew flags
merged['RenewSeasonTicket'] = np.where(
    merged['RenewSeasonTicket'].isna(),
    False,
    True
)

merged.loc[merged['RenewSeasonTicket'] == 'TRUE', 'Status'] = 'חידש'

# 🟢 Create Full Name
merged['Full Name'] = merged['First name'] + ' ' + merged['Last name']
merged["TZ"] = merged["assign using  ID number"].combine_first(merged["User"])

merged["OldSector"] = merged["OldSector"].astype(str).str.strip()

# 🟢 Filter sectors
mask = merged["OldSector"].str.contains('גלריה|פרקט', na=False)

# Filter them out
merged = merged[~mask]

# Keep only the columns you want
columns_to_keep = [
    'Product',
    'Account Name',
    'Full Name',
    'User Id',
    'TZ',
    'Email',
    'School',
    'Phone',
    'OldSeasonTicketId',
    'Old Season Ticket (All Owners)',
    'Base price',
    'Price',
    'Old Seat',
    'Price area',
    'OldTransactionDate',
    'RenewSeasonTicketId',
    'RenewSeasonTicket',
    'Renew Seat',
    'RenewTransactionDate',
    'RenewType',
    'Marketing Allowed'
    ]
merged = merged[columns_to_keep]

merged

  merged = base_df.merge(


Unnamed: 0,Product,Account Name,Full Name,User Id,TZ,Email,School,Phone,OldSeasonTicketId,Old Season Ticket (All Owners),...,Price,Old Seat,Price area,OldTransactionDate,RenewSeasonTicketId,RenewSeasonTicket,Renew Seat,RenewTransactionDate,RenewType,Marketing Allowed
0,מנוי לעונת 2024/2025,שי קציר,שי קציר,10010,40175192.0,katzirs30@gmail.com,,0502877926,859905.0,מנוי לעונת 2024/2025,...,1550,אולם 2 שורה 19 כיסא 13,B,2024-12-05 14:43:58,899548.0,True,אולם 2 שורה 19 כיסא 13,2025-06-10 09:35:20,SameSeat,False
1,מנוי לעונת 2024/2025,שלמה קוטלר,שלמה קוטלר,10011,94938.0,rachel@kotler-adika.co.il,,0544755212,849584.0,מנוי לעונת 2024/2025,...,3300,אולם 9 שורה 18 כיסא 10,Silver Up,2024-10-10 15:27:23,,False,,,Empty,True
2,,יונתן אנסלמן,יונתן אנסלמן,10528,38818340.0,yonie84@gmail.com,,0524567167,820053.0,מנוי לעונת 2024/2025,...,,אולם 10 שורה 15 כיסא 12,,2024-07-21 20:09:08,,False,,,Empty,True
3,מנוי לעונת 2024/2025,רונן נהרי,רונן נהרי,10529,59723486.0,48ronen@gmail.com,,0528523030,805315.0,מנוי לעונת 2024/2025,...,2100,אולם 10 שורה 6 כיסא 1,A,2024-09-03 20:43:06,897504.0,True,אולם 10 שורה 6 כיסא 1,2025-06-08 17:30:53,SameSeat,False
4,מנוי לעונת 2024/2025,עידו פלדור,עידו פלדור,10536,33486960.0,paldor1@gmail.com,,(050) 894-6047,813774.0,מנוי לעונת 2024/2025,...,1050,1C שורה 4 כיסא 30,C,2024-08-09 16:29:40,881084.0,True,1C שורה 4 כיסא 30,2025-06-05 11:58:44,SameSeat,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22750,,מיכל אהלי,,1430149,209490556.0,,,,,,...,,,,,,False,,,,False
22751,,יוחנן שיפמן,,11085,11085.0,,,0507874290,,,...,,,,,,False,,,,False
22752,,עומר קרני,,31779,205417157.0,omerkarni24@gmail.com,,0523142105,,,...,,,,,,False,,,,False
22753,,אפרים בורשטיין,,13542,13542.0,,,0526966877,,,...,,,,,,False,,,,False


In [10]:
# 🟢 Load ongoing status Excel
ongoing = pd.read_excel('מאסטר קמפיין מנויים 2025_26.xlsx', sheet_name='חדש')
merged['User Id'] = pd.to_numeric(merged['User Id'], errors="coerce").astype("Int64")
ongoing['User Id'] = pd.to_numeric(ongoing['User Id'], errors="coerce").astype("Int64")

# 🟢 Merge merged dataset with ongoing status
final_df = merged.merge(
    ongoing,
    on="User Id",
    how="left",
    suffixes=("_merged", "_ongoing")
)

assert len(final_df) == len(merged), "Row count changed!"

# 🟢 Combine merged/ongoing columns safely within merged DataFrame
def combine_merged_ongoing_columns(df, drop_original=False):
    merged_cols = [col for col in df.columns if col.endswith('_merged')]
    for merged_col in merged_cols:
        base = merged_col.rsplit('_', 1)[0]
        ongoing_col = base + '_ongoing'
        if ongoing_col in df.columns:
            df[base] = df[merged_col].combine_first(df[ongoing_col])
            if drop_original:
                df.drop(columns=[merged_col, ongoing_col], inplace=True)
    return df

final_df = combine_merged_ongoing_columns(final_df, drop_original=True)

# 🟢 Fill Full Name if missing
final_df["Full Name"] = final_df["Full Name"].combine_first(final_df["Account Name"])

# 🟢 Remove duplicates
final_df = final_df.drop_duplicates(subset="User Id")

final_df


  df[base] = df[merged_col].combine_first(df[ongoing_col])


Unnamed: 0,Product,Account Name,User Id,School,Old Seat,Price area,RenewSeasonTicketId,RenewSeasonTicket,Renew Seat,RenewTransactionDate,...,Full Name,TZ,Email,Phone,OldSeasonTicketId,Old Season Ticket (All Owners),Base price,Price,OldTransactionDate,Marketing Allowed
0,מנוי לעונת 2024/2025,שי קציר,10010,,אולם 2 שורה 19 כיסא 13,B,899548.0,True,אולם 2 שורה 19 כיסא 13,2025-06-10 09:35:20,...,שי קציר,40175192.0,katzirs30@gmail.com,0502877926,859905.0,מנוי לעונת 2024/2025,1550.0,1550.0,2024-12-05 14:43:58,False
1,מנוי לעונת 2024/2025,שלמה קוטלר,10011,,אולם 9 שורה 18 כיסא 10,Silver Up,,False,,,...,שלמה קוטלר,94938.0,rachel@kotler-adika.co.il,0544755212,849584.0,מנוי לעונת 2024/2025,3300.0,3300.0,2024-10-10 15:27:23,True
2,,יונתן אנסלמן,10528,,אולם 10 שורה 15 כיסא 12,,,False,,,...,יונתן אנסלמן,38818340.0,yonie84@gmail.com,0524567167,820053.0,מנוי לעונת 2024/2025,1550.0,1240.0,2024-07-21 20:09:08,True
3,מנוי לעונת 2024/2025,רונן נהרי,10529,,אולם 10 שורה 6 כיסא 1,A,897504.0,True,אולם 10 שורה 6 כיסא 1,2025-06-08 17:30:53,...,רונן נהרי,59723486.0,48ronen@gmail.com,0528523030,805315.0,מנוי לעונת 2024/2025,2100.0,2100.0,2024-09-03 20:43:06,False
4,מנוי לעונת 2024/2025,עידו פלדור,10536,,1C שורה 4 כיסא 30,C,881084.0,True,1C שורה 4 כיסא 30,2025-06-05 11:58:44,...,עידו פלדור,33486960.0,paldor1@gmail.com,(050) 894-6047,813774.0,מנוי לעונת 2024/2025,1050.0,1050.0,2024-08-09 16:29:40,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22132,,מיכל אהלי,1430149,,,,,False,,,...,מיכל אהלי,209490556.0,,,,,,,,False
22133,,יוחנן שיפמן,11085,,,,,False,,,...,יוחנן שיפמן,11085.0,,0507874290,,"2016/2017, 2015/2016, 2014/2015",,,,False
22134,,עומר קרני,31779,,,,,False,,,...,עומר קרני,205417157.0,omerkarni24@gmail.com,0523142105,,2019/2020,,,,False
22135,,אפרים בורשטיין,13542,,,,,False,,,...,אפרים בורשטיין,13542.0,,0526966877,,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",,,,False


In [11]:
# Create "Final ID" by prioritizing assign using ID number, then User from merged, then User from ongoing
final_df["Old Season Ticket (All Owners)"] = final_df["Old Season Ticket (All Owners)"].combine_first(final_df["Product"])

# Remove duplicates based on User Id
final_df = final_df.drop_duplicates(subset="User Id")

# Fill in Full Name if missing, using Account Name
final_df["Full Name"] = final_df["Full Name"].combine_first(final_df["Account Name"])

# Final columns to keep
columns_to_keep = [
    'Assignment',
    'Status',
    'Comments',
    'קמפיין',
    'Full Name',
    'TZ',
    'User Id', 
    'Email',
    'Phone',
    'Old Season Ticket (All Owners)',
    'OldTransactionDate',
    'Old Seat',
    'Base price',
    'Price',
    'Price area',
    'OldPriceType',
    'OldSeasonTicketId',
    'RenewSeasonTicket',
    'Renew Seat',
    'RenewTransactionDate',
    'RenewType',
    'RenewSeasonTicketId',
    'Marketing Allowed'
]
final_df = final_df[columns_to_keep]
final_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,Full Name,TZ,User Id,Email,Phone,Old Season Ticket (All Owners),...,Price,Price area,OldPriceType,OldSeasonTicketId,RenewSeasonTicket,Renew Seat,RenewTransactionDate,RenewType,RenewSeasonTicketId,Marketing Allowed
0,,,,,שי קציר,40175192.0,10010,katzirs30@gmail.com,0502877926,מנוי לעונת 2024/2025,...,1550.0,B,,859905.0,True,אולם 2 שורה 19 כיסא 13,2025-06-10 09:35:20,SameSeat,899548.0,False
1,עידן,,,מחדשי 2024/25 רגילים,שלמה קוטלר,94938.0,10011,rachel@kotler-adika.co.il,0544755212,מנוי לעונת 2024/2025,...,3300.0,Silver Up,Adult,849584.0,False,,,Empty,,True
2,עדי,לא לגעת,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,יונתן אנסלמן,38818340.0,10528,yonie84@gmail.com,0524567167,מנוי לעונת 2024/2025,...,1240.0,,Adult (renew),820053.0,False,,,Empty,,True
3,,,,,רונן נהרי,59723486.0,10529,48ronen@gmail.com,0528523030,מנוי לעונת 2024/2025,...,2100.0,A,,805315.0,True,אולם 10 שורה 6 כיסא 1,2025-06-08 17:30:53,SameSeat,897504.0,False
4,,,,,עידו פלדור,33486960.0,10536,paldor1@gmail.com,(050) 894-6047,מנוי לעונת 2024/2025,...,1050.0,C,,813774.0,True,1C שורה 4 כיסא 30,2025-06-05 11:58:44,SameSeat,881084.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22132,,,,,מיכל אהלי,209490556.0,1430149,,,,...,,,,,False,,,,,False
22133,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,יוחנן שיפמן,11085.0,11085,,0507874290,"2016/2017, 2015/2016, 2014/2015",...,,,,,False,,,,,False
22134,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,עומר קרני,205417157.0,31779,omerkarni24@gmail.com,0523142105,2019/2020,...,,,,,False,,,,,False
22135,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,אפרים בורשטיין,13542.0,13542,,0526966877,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",...,,,,,False,,,,,False


In [12]:
cols_to_check = ['Status', 'Assignment', 'קמפיין', 'Comments']

# For collecting discrepancies
discrepancies = []

# Loop over ongoing rows
for idx, row in ongoing.iterrows():
    user_id = row['User Id']
    
    # Skip rows with no User Id
    if pd.isna(user_id):
        continue
    
    # Look up this user in final_df
    match = final_df[final_df['User Id'] == user_id]
    
    if match.empty:
        # No row for this user
        continue
    
    # We expect only one row per User Id
    match_row = match.iloc[0]
    
    # Compare each column
    for col in cols_to_check:
        val_ongoing = row[col]
        val_final = match_row[col]
        
        # If both are NaN, consider them equal
        if pd.isna(val_ongoing) and pd.isna(val_final):
            continue
        
        # If they differ
        if val_ongoing != val_final:
            discrepancies.append({
                'User Id': user_id,
                'Column': col,
                'Ongoing Value': val_ongoing,
                'Final Value': val_final
            })

In [13]:
if discrepancies:
    print("⚠️ Discrepancies found between ongoing and final_df:")
    for d in discrepancies:
        print(
            f"- User Id {d['User Id']} | Column '{d['Column']}': "
            f"Ongoing='{d['Ongoing Value']}' vs Final='{d['Final Value']}'"
        )
else:
    print("✅ All values match exactly per User Id.")

✅ All values match exactly per User Id.


#### If something was missing:
##### Then check again

In [None]:
for d in discrepancies:
    uid = d['User Id']
    col = d['Column']
    correct_value = d['Ongoing Value']
    
    final_df.loc[final_df['User Id'] == uid, col] = correct_value

In [14]:
# Exclude rows with specific OldPriceType values
values_to_drop = [
    "מחלקת נוער",
    "Community",
    "עסקי",
    "Box",
    'מחלקת נוער בי\\'
]
final_df = final_df[
    ~final_df["OldPriceType"].isin(values_to_drop)
]

final_df = final_df[~final_df['Full Name'].str.contains(('test|Toptix Ltd|בדיקה|test'), case=False, na=False)]

### Add st_2526 new members:

In [15]:
st_2526 = prepare_cleaned_season_data(
    st_csv_path="List_Report_20250703140618.csv",
    vouchers_csv_path="VoucherUseReport_2025-07-03.csv",
    vouchers_to_exclude=vouchers_to_exclude
)
st_2526 = combine_x_y_columns(st_2526, drop_original=True)
st_2526

  df[base] = df[merged_col].combine_first(df[ongoing_col])
  df[base] = df[merged_col].combine_first(df[ongoing_col])
  df[base] = df[merged_col].combine_first(df[ongoing_col])
  df[base] = df[merged_col].combine_first(df[ongoing_col])


Unnamed: 0,User Id,User first name,User last name,Package name,Product Id,Product,Status,Id,Fan / Company,assign using ID number,...,Payment type,Payment method,Delivery type,Ticket Note,Transaction Note,Role,ReturnRequestStatus,Voucher number,Voucher value,Unnamed: 51
0,1348638,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,902982,חנה לפיד,011595162,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,Cashier,,,,
1,1335364,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,902981,אלדד לפיד,55920086,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,Cashier,,,,
2,11226,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,902969,אליק הוכנר,60742327,...,PayType_External Payment Subscriptions,External Payment Subscriptions,Virtual Card,,,Administrator,,,,
6,10547,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,902964,מאשקה ליטבק,1597624,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,Administrator,,,,
7,14076,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,902951,אמנון בודנרו,50328491,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,Cashier,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661000,1335439,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,881070,איתן אדר,024957136,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,,,
661001,16172,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,881069,דנה שרון אדר,25110867,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,,,
661002,1335438,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,881071,נועם אדר,214554974,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,User,,,,
661003,12927,אוברגוט,302506571,0,194,מנוי לעונת 2025/2026,Active,881065,עמית בלינקוב,203147582,...,Pelecard_PayType_Pelecard_Credit Card,Pelecard_Credit Card,Virtual Card,,,Administrator,,,,


In [16]:
def build_renew_seat(row):
    if pd.isna(row['Area']) or pd.isna(row['Row']) or pd.isna(row['Number']):
        return None
    return (
        str(row['Area']) +
        " שורה " +
        clean_number(row['Row']) +
        " כיסא " +
        clean_number(row['Number'])
    )

st_2526['Renew Seat'] = st_2526.apply(build_renew_seat, axis=1)
st_2526 = st_2526.rename(columns={
'Id': 'RenewSeasonTicketId',
'Ticket price types': 'RenewPriceType',
'Price area': 'Renew Price Area',
"Fan / Company": "Full Name",
"assign using  ID number": "TZ",
"Date": "RenewTransactionDate",
"Price": 'RenewPrice', 
"Base price": "Renew Base Price"
})

st_2526 = st_2526[st_2526['RenewPriceType'] != 'Complementary']

# Select and rename columns
st_2526 = st_2526[[
    "Full Name",
    "TZ",
    "User Id",
    "Email",
    "Phone",
    "Renew Base Price",
    "RenewPrice",
    "Renew Price Area",
    "RenewPriceType",
    "RenewSeasonTicketId",
    "Renew Seat",
    "RenewTransactionDate"
]]

st_2526 = st_2526.drop_duplicates(subset='User Id')

st_2526

Unnamed: 0,Full Name,TZ,User Id,Email,Phone,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType,RenewSeasonTicketId,Renew Seat,RenewTransactionDate
0,חנה לפיד,011595162,1348638,chlapid@gmail.com,050000000,1600,1280,B,Adult (renew),902982,אולם 4 שורה 18 כיסא 2,2025-07-03 1:44 PM
1,אלדד לפיד,55920086,1335364,lapideldad7@gmail.com,,1600,1600,B,Adult (renew),902981,אולם 4 שורה 18 כיסא 3,2025-07-03 1:44 PM
2,אליק הוכנר,60742327,11226,alik.hochner@gmail.com,0545795006,5000,5000,Silver,Adult,902969,אולם 3 שורה 8 כיסא 19,2025-07-03 12:24 PM
6,מאשקה ליטבק,1597624,10547,mashka@negba.org.il,0505750953,1650,1320,B,Adult - New promo,902964,אולם 2 שורה 11 כיסא 16,2025-07-03 12:13 PM
7,אמנון בודנרו,50328491,14076,budneroamnon@gmail.com,0542425501,1395,1395,A,Adult (renew),902951,אולם 10 שורה 8 כיסא 10,2025-07-03 10:27 AM
...,...,...,...,...,...,...,...,...,...,...,...,...
660999,מידב צמח,25330,25330,meidav123@icloud.com,0549013883,1300,1300,C,Playoff finals - Adult,881075,אולם 5 שורה 17 כיסא 31,2025-06-05 11:58 AM
661000,איתן אדר,024957136,1335439,,,850,850,D,Playoff finals - Adult,881070,12 אולם שורה 17 כיסא 20,2025-06-05 11:56 AM
661001,דנה שרון אדר,25110867,16172,dana.sharonadar@gmail.com,0507324716,850,850,D,Playoff finals - Adult,881069,12 אולם שורה 17 כיסא 21,2025-06-05 11:56 AM
661002,נועם אדר,214554974,1335438,,,700,700,D,Child,881071,12 אולם שורה 17 כיסא 19,2025-06-05 11:56 AM


In [17]:
merged_df = final_df.merge(
    st_2526,
    on="User Id",
    how="left",
    suffixes=("", "_2526")
)

def combine_2526_columns(df, drop_original=False):
    merged_cols = [col for col in df.columns if col.endswith('_2526')]
    
    # Start with a Series of all False
    renew_flag = pd.Series(False, index=df.index)

    for merged_col in merged_cols:
        base_col = merged_col.rsplit('_', 1)[0]
        
        if base_col in df.columns:
            # Update the flag if base_col is null and merged_col is not null
            renew_flag |= df[base_col].isna() & df[merged_col].notna()
            # Combine
            df[base_col] = df[base_col].combine_first(df[merged_col])
        else:
            # If no base col, rename
            df.rename(columns={merged_col: base_col}, inplace=True)
            # Any non-null here means it came from st_2526
            renew_flag |= df[base_col].notna()
        
        # Drop if requested
        if drop_original:
            df.drop(columns=[merged_col], inplace=True)
    
    # Add RenewSeasonTicket flag
    df["RenewSeasonTicket"] = renew_flag

    return df

merged_df = combine_2526_columns(merged_df, drop_original=True)

merged_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,Full Name,TZ,User Id,Email,Phone,Old Season Ticket (All Owners),...,RenewSeasonTicket,Renew Seat,RenewTransactionDate,RenewType,RenewSeasonTicketId,Marketing Allowed,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType
0,,,,,שי קציר,40175192.0,10010,katzirs30@gmail.com,0502877926,מנוי לעונת 2024/2025,...,False,אולם 2 שורה 19 כיסא 13,2025-06-10 09:35:20,SameSeat,899548.0,False,1650,1650,B,Playoff finals - Adult
1,עידן,,,מחדשי 2024/25 רגילים,שלמה קוטלר,94938.0,10011,rachel@kotler-adika.co.il,0544755212,מנוי לעונת 2024/2025,...,False,,,Empty,,True,,,,
2,עדי,לא לגעת,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,יונתן אנסלמן,38818340.0,10528,yonie84@gmail.com,0524567167,מנוי לעונת 2024/2025,...,False,,,Empty,,True,,,,
3,,,,,רונן נהרי,59723486.0,10529,48ronen@gmail.com,0528523030,מנוי לעונת 2024/2025,...,False,אולם 10 שורה 6 כיסא 1,2025-06-08 17:30:53,SameSeat,897504.0,False,2100,1470,A,Playoff finals - Adult
4,,,,,עידו פלדור,33486960.0,10536,paldor1@gmail.com,(050) 894-6047,מנוי לעונת 2024/2025,...,False,1C שורה 4 כיסא 30,2025-06-05 11:58:44,SameSeat,881084.0,False,1450,1450,C,Adult
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21915,,,,,מיכל אהלי,209490556.0,1430149,,,,...,True,אולם 2 שורה 21 כיסא 13,2025-07-01 2:19 PM,,902628.0,False,1650,1650,B,Adult - New promo
21916,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,יוחנן שיפמן,11085.0,11085,,0507874290,"2016/2017, 2015/2016, 2014/2015",...,False,,,,,False,,,,
21917,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,עומר קרני,205417157.0,31779,omerkarni24@gmail.com,0523142105,2019/2020,...,True,אולם 3 שורה 20 כיסא 17,2025-07-01 3:22 PM,,902638.0,False,2200,1760,A,Adult - New promo
21918,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,אפרים בורשטיין,13542.0,13542,,0526966877,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",...,False,,,,,False,,,,


### Add organization for all users:

In [19]:
robo_users = pd.read_csv('UserCreatedReport_2025-07-01 16_33.csv')
robo_users['Full Name'] = robo_users['firstname'] + " " + robo_users['lastname']
robo_users = robo_users[['Full Name', 'userid', 'isorganization']]

robo_users['userid'] = pd.to_numeric(robo_users['userid'], errors="coerce").astype("Int64")

robo_users['isorganization'] = robo_users['isorganization'].map({'Yes': True, 'No': False})

assert robo_users['userid'].is_unique, "Error: duplicate user IDs in robo_users!"

  robo_users = pd.read_csv('UserCreatedReport_2025-07-01 16_33.csv')


AssertionError: Error: duplicate user IDs in robo_users!

#### If fails:

In [20]:
robo_users = robo_users.drop_duplicates()
assert robo_users['userid'].is_unique, "Still duplicates!"

#### Continue:

In [21]:
merged_df = merged_df.merge(
    robo_users,
    left_on='User Id',
    right_on='userid',
    how='left',
    suffixes=('', '_robo')
)

assert len(merged_df) == len(final_df), "Row count changed!"

merged_df["Full Name"] = np.where(
    merged_df["Full Name"] == "relationship lite user",
    merged_df["Full Name_robo"],
    merged_df["Full Name"]
)

merged_df = merged_df.drop(columns=['Full Name_robo', 'userid'])
merged_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,Full Name,TZ,User Id,Email,Phone,Old Season Ticket (All Owners),...,Renew Seat,RenewTransactionDate,RenewType,RenewSeasonTicketId,Marketing Allowed,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType,isorganization
0,,,,,שי קציר,40175192.0,10010,katzirs30@gmail.com,0502877926,מנוי לעונת 2024/2025,...,אולם 2 שורה 19 כיסא 13,2025-06-10 09:35:20,SameSeat,899548.0,False,1650,1650,B,Playoff finals - Adult,True
1,עידן,,,מחדשי 2024/25 רגילים,שלמה קוטלר,94938.0,10011,rachel@kotler-adika.co.il,0544755212,מנוי לעונת 2024/2025,...,,,Empty,,True,,,,,True
2,עדי,לא לגעת,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,יונתן אנסלמן,38818340.0,10528,yonie84@gmail.com,0524567167,מנוי לעונת 2024/2025,...,,,Empty,,True,,,,,False
3,,,,,רונן נהרי,59723486.0,10529,48ronen@gmail.com,0528523030,מנוי לעונת 2024/2025,...,אולם 10 שורה 6 כיסא 1,2025-06-08 17:30:53,SameSeat,897504.0,False,2100,1470,A,Playoff finals - Adult,False
4,,,,,עידו פלדור,33486960.0,10536,paldor1@gmail.com,(050) 894-6047,מנוי לעונת 2024/2025,...,1C שורה 4 כיסא 30,2025-06-05 11:58:44,SameSeat,881084.0,False,1450,1450,C,Adult,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21915,,,,,מיכל אהלי,209490556.0,1430149,,,,...,אולם 2 שורה 21 כיסא 13,2025-07-01 2:19 PM,,902628.0,False,1650,1650,B,Adult - New promo,
21916,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,יוחנן שיפמן,11085.0,11085,,0507874290,"2016/2017, 2015/2016, 2014/2015",...,,,,,False,,,,,
21917,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,עומר קרני,205417157.0,31779,omerkarni24@gmail.com,0523142105,2019/2020,...,אולם 3 שורה 20 כיסא 17,2025-07-01 3:22 PM,,902638.0,False,2200,1760,A,Adult - New promo,False
21918,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,אפרים בורשטיין,13542.0,13542,,0526966877,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",...,,,,,False,,,,,False


### Add riseabove:

In [22]:
riseabove = pd.read_csv('riseabove.csv')
riseabove = riseabove.drop_duplicates(subset=['User Id'])
riseabove = riseabove[['Product', 'User Id']]
riseabove['User Id'] = pd.to_numeric(riseabove['User Id'], errors="coerce").astype("Int64")

# Save the row count before merge
initial_rows = len(merged_df)

# Merge
merged_df = merged_df.merge(riseabove, on='User Id', how='left')

# Check if shape changed
if len(merged_df) != initial_rows:
    raise ValueError(
        f"Merge increased the number of rows! Before: {initial_rows}, After: {len(merged_df)}"
    )

# Create flag
merged_df['RiseAbove2425'] = merged_df['Product'].notna()

# Update old season ticket
merged_df.loc[merged_df['RiseAbove2425'], 'Old Season Ticket (All Owners)'] = merged_df.loc[merged_df['RiseAbove2425'], 'Product']

# Ensure flag is boolean
merged_df['RiseAbove2425'] = merged_df['RiseAbove2425'].fillna(False)

merged_df = merged_df.drop(columns=['Product'])

merged_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,Full Name,TZ,User Id,Email,Phone,Old Season Ticket (All Owners),...,RenewTransactionDate,RenewType,RenewSeasonTicketId,Marketing Allowed,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType,isorganization,RiseAbove2425
0,,,,,שי קציר,40175192.0,10010,katzirs30@gmail.com,0502877926,מנוי לעונת 2024/2025,...,2025-06-10 09:35:20,SameSeat,899548.0,False,1650,1650,B,Playoff finals - Adult,True,False
1,עידן,,,מחדשי 2024/25 רגילים,שלמה קוטלר,94938.0,10011,rachel@kotler-adika.co.il,0544755212,מנוי לעונת 2024/2025,...,,Empty,,True,,,,,True,False
2,עדי,לא לגעת,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,יונתן אנסלמן,38818340.0,10528,yonie84@gmail.com,0524567167,מנוי לעונת 2024/2025,...,,Empty,,True,,,,,False,False
3,,,,,רונן נהרי,59723486.0,10529,48ronen@gmail.com,0528523030,מנוי לעונת 2024/2025,...,2025-06-08 17:30:53,SameSeat,897504.0,False,2100,1470,A,Playoff finals - Adult,False,False
4,,,,,עידו פלדור,33486960.0,10536,paldor1@gmail.com,(050) 894-6047,מנוי לעונת 2024/2025,...,2025-06-05 11:58:44,SameSeat,881084.0,False,1450,1450,C,Adult,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21915,,,,,מיכל אהלי,209490556.0,1430149,,,,...,2025-07-01 2:19 PM,,902628.0,False,1650,1650,B,Adult - New promo,,False
21916,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,יוחנן שיפמן,11085.0,11085,,0507874290,"2016/2017, 2015/2016, 2014/2015",...,,,,False,,,,,,False
21917,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,עומר קרני,205417157.0,31779,omerkarni24@gmail.com,0523142105,2019/2020,...,2025-07-01 3:22 PM,,902638.0,False,2200,1760,A,Adult - New promo,False,False
21918,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,אפרים בורשטיין,13542.0,13542,,0526966877,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",...,,,,False,,,,,False,False


### ONE TEAM ONE FAMILY:

In [23]:
names_to_exclude = [
    "בטי דאוי",
    "אריה דאוי",
    "תומר דאוי",
    "עדן דאוי",
    'בועז זוסמן',
    'מיקה זוסמן',
    'עופר ברעם',
    'איתי ברק',
    'יהונתן קפלן',
    'תהילה קפלן',
    'הראל רפאל טלקר',
    'אורי חיון',
    'שרית זוסמן',
    'צבי זוסמן',
    'עילי שוורצמן',
    'אורן שוורצמן',
    'רונן נהרי',
    'בר נהרי',
    'עומר נהרי'
]

emails_to_exclude = [
    "msleon123@gmail.com",
    'hareven@gmail.com',
    'avalid59@gmail.com',
    'gordonsarah01@gmail.com',
    'yishai757@gmail.com',
    'laor.print@gmail.com',
    'rois@nvidia.com',
    'roisasson6@gmail.com'
]

merged_df = merged_df[~merged_df['Email'].isin(emails_to_exclude)]
merged_df = merged_df[~merged_df['Full Name'].isin(names_to_exclude)]

merged_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,Full Name,TZ,User Id,Email,Phone,Old Season Ticket (All Owners),...,RenewTransactionDate,RenewType,RenewSeasonTicketId,Marketing Allowed,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType,isorganization,RiseAbove2425
0,,,,,שי קציר,40175192.0,10010,katzirs30@gmail.com,0502877926,מנוי לעונת 2024/2025,...,2025-06-10 09:35:20,SameSeat,899548.0,False,1650,1650,B,Playoff finals - Adult,True,False
1,עידן,,,מחדשי 2024/25 רגילים,שלמה קוטלר,94938.0,10011,rachel@kotler-adika.co.il,0544755212,מנוי לעונת 2024/2025,...,,Empty,,True,,,,,True,False
2,עדי,לא לגעת,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,יונתן אנסלמן,38818340.0,10528,yonie84@gmail.com,0524567167,מנוי לעונת 2024/2025,...,,Empty,,True,,,,,False,False
4,,,,,עידו פלדור,33486960.0,10536,paldor1@gmail.com,(050) 894-6047,מנוי לעונת 2024/2025,...,2025-06-05 11:58:44,SameSeat,881084.0,False,1450,1450,C,Adult,False,False
5,,,,,דרור מיג'אן,54906474.0,10538,drormijan@gmail.com,(050) 627-2772,מנוי לעונת 2024/2025,...,2025-06-25 12:34:47,SameSeat,900441.0,True,,,,,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21915,,,,,מיכל אהלי,209490556.0,1430149,,,,...,2025-07-01 2:19 PM,,902628.0,False,1650,1650,B,Adult - New promo,,False
21916,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,יוחנן שיפמן,11085.0,11085,,0507874290,"2016/2017, 2015/2016, 2014/2015",...,,,,False,,,,,,False
21917,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,עומר קרני,205417157.0,31779,omerkarni24@gmail.com,0523142105,2019/2020,...,2025-07-01 3:22 PM,,902638.0,False,2200,1760,A,Adult - New promo,False,False
21918,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,אפרים בורשטיין,13542.0,13542,,0526966877,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",...,,,,False,,,,,False,False


### Seniority:

In [None]:
seniority = pd.read_excel('Output Segments.xlsx')

# Make sure IDs are numeric
seniority["client_number"] = pd.to_numeric(seniority["client_number"], errors="coerce").astype("Int64")

# Function to get concatenated column names
def concat_columns_with_ones(row):
    cols_with_one = [col for col in row.index if row[col] in (1, 1.0)]
    return ", ".join(cols_with_one) if cols_with_one else None

# Build mapping
seniority_mapping = seniority.set_index("client_number").apply(concat_columns_with_ones, axis=1).to_dict()

# Map to merged_df
merged_df["Seniority_String"] = merged_df["User Id"].map(seniority_mapping)

# Mask: override whenever we have a mapped value
mask = merged_df["Seniority_String"].notna()

# Update (always overwrite)
merged_df.loc[mask, "Old Season Ticket (All Owners)"] = merged_df.loc[mask, "Seniority_String"]

# Clean up
merged_df = merged_df.drop(columns=["Seniority_String"])

# Create ST_Count by splitting on commas
merged_df["ST_Count"] = (
    merged_df["Old Season Ticket (All Owners)"]
    .fillna("")
    .apply(lambda x: len([item for item in x.split(",") if item.strip()]) if x else 0)
)

merged_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,Full Name,TZ,User Id,Email,Phone,Old Season Ticket (All Owners),...,RenewType,RenewSeasonTicketId,Marketing Allowed,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType,isorganization,RiseAbove2425,ST_Count
0,,,,,שי קציר,40175192.0,10010,katzirs30@gmail.com,0502877926,מנוי לעונת 2024/2025,...,SameSeat,899548.0,False,1650,1650,B,Playoff finals - Adult,True,False,1
1,עידן,,,מחדשי 2024/25 רגילים,שלמה קוטלר,94938.0,10011,rachel@kotler-adika.co.il,0544755212,מנוי לעונת 2024/2025,...,Empty,,True,,,,,True,False,1
2,עדי,לא לגעת,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,יונתן אנסלמן,38818340.0,10528,yonie84@gmail.com,0524567167,מנוי לעונת 2024/2025,...,Empty,,True,,,,,False,False,1
4,,,,,עידו פלדור,33486960.0,10536,paldor1@gmail.com,(050) 894-6047,מנוי לעונת 2024/2025,...,SameSeat,881084.0,False,1450,1450,C,Adult,False,False,1
5,,,,,דרור מיג'אן,54906474.0,10538,drormijan@gmail.com,(050) 627-2772,מנוי לעונת 2024/2025,...,SameSeat,900441.0,True,,,,,True,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21915,,,,,מיכל אהלי,209490556.0,1430149,,,,...,,902628.0,False,1650,1650,B,Adult - New promo,,False,0
21916,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,יוחנן שיפמן,11085.0,11085,,0507874290,"2016/2017, 2015/2016, 2014/2015",...,,,False,,,,,,False,3
21917,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,עומר קרני,205417157.0,31779,omerkarni24@gmail.com,0523142105,2019/2020,...,,902638.0,False,2200,1760,A,Adult - New promo,False,False,1
21918,B,,,מנויי עבר ללא מנוי השנה - חוזרים הביתה,אפרים בורשטיין,13542.0,13542,,0526966877,"2022/2023, 2021/2022, 2019/2020, 2018/2019...",...,,,False,,,,,False,False,6


In [None]:
merged_df["RenewSeasonTicket"] = np.where(
    merged_df["RenewTransactionDate"].notna(),
    'חידש',
    'None'
)

### Fix lots of duplicates on איתי בר-אב:

In [None]:
# # 1️⃣ Load sf_users
# sf_users = pd.read_excel('Users report-2025-07-05-23-44-53.xlsx')
# print("✅ Loaded sf_users:", sf_users.shape)

# # 2️⃣ Ensure IDs are numeric
# enriched_df["User Id"] = pd.to_numeric(enriched_df["User Id"], errors="coerce").astype("Int64")
# sf_users["HJBC ID"] = pd.to_numeric(sf_users["HJBC ID"], errors="coerce").astype("Int64")

# # 3️⃣ Copy before merge to preserve original names
# pre_merge = enriched_df.copy()

# # 4️⃣ Mask rows with Full Name = איתי בר-אב
# mask_itai_barav = pre_merge["Full Name"] == "איתי בר-אב"

# # 5️⃣ Get User Ids
# user_ids_to_replace = pre_merge.loc[mask_itai_barav, "User Id"].unique()
# print("✅ User Ids to replace:", user_ids_to_replace)

# # 6️⃣ Merge
# test = enriched_df.merge(
#     sf_users,
#     how="left",
#     left_on="User Id",
#     right_on="HJBC ID",
#     suffixes=("", "_user")
# )

# # 7️⃣ Build mask after merge
# mask_final = test["User Id"].isin(user_ids_to_replace)
# print("✅ Rows matching mask after merge:", mask_final.sum())

# # 8️⃣ Show preview before replacing
# if mask_final.sum() > 0:
#     print("✅ Sample rows BEFORE replacement:")
#     display(test.loc[mask_final, ["User Id", "Full Name", "Email", "Phone"]].head())

# # 9️⃣ Overwrite Full Name, Email, Phone, TZ
# columns_to_update = [
#     ("Full Name", "Account Name"),
#     ("Email", "Email_user"),
#     ("Phone", "Phone_user"),
#     ("TZ", "ID")
# ]

# for target_col, source_col in columns_to_update:
#     if source_col in test.columns:
#         test.loc[mask_final, target_col] = test.loc[mask_final, source_col]
#         print(f"✅ Overwrote column: {target_col}")

# # 🔟 Clear other columns
# columns_to_fill_none = [
#     "Assignment",
#     "Status",
#     "Comments",
#     "קמפיין",
#     "Old Season Ticket (All Owners)",
#     "OldTransactionDate",
#     "Old Seat",
#     "Base price",
#     "Price",
#     "Price area",
#     "OldPriceType",
#     "OldSeasonTicketId",
#     "RenewSeasonTicket",
#     "Renew Seat",
#     "RenewTransactionDate",
#     "RenewType",
#     "RenewSeasonTicketId",
#     "Renew Base Price",
#     "RenewPrice",
#     "Renew Price Area",
#     "RenewPriceType",
#     "RiseAbove2425"
# ]

# for col in columns_to_fill_none:
#     if col in test.columns:
#         test.loc[mask_final, col] = "None"
# print(f"✅ Cleared columns: {len(columns_to_fill_none)}")

# # 1️⃣1️⃣ Flags
# test.loc[mask_final, "isorganization"] = False
# test.loc[mask_final, "ST_Count"] = 0

# # 1️⃣2️⃣ Drop redundant columns
# cols_to_drop = [
#     "Marketing Allowed_user",
#     "Phone_user",
#     "Email_user",
#     "Account Name",
#     "ID",
#     "HJBC ID",
#     "Age",
#     "City",
#     "Street",
#     "Last Activity",
#     "Created Date",
#     "Is Lite User",
#     "Is SSO User"
# ]
# test = test.drop(columns=cols_to_drop, errors="ignore")
# print(f"✅ Dropped columns: {len(cols_to_drop)}")

# # 1️⃣3️⃣ Validate results
# final_rows = test[test["User Id"].isin(user_ids_to_replace)]
# print("✅ Final sample rows AFTER update:")
# display(final_rows[["User Id", "Full Name", "Email", "Phone", "ST_Count"]])

# # Optional: assign back to enriched_df
# enriched_df = test.copy()

### FIX BASE PRICE & PRICE OF ST_2425:

In [None]:
# st_2425 = pd.read_csv('List_Report_20250609080140.csv')

# # 1️⃣ Filter initial rows with missing prices in מנוי לעונת 2024/2025
# mask_season = enriched_df["Old Season Ticket (All Owners)"] == "מנוי לעונת 2024/2025"
# mask_prices = enriched_df["Base price"].isna() | enriched_df["Price"].isna()
# mask = mask_season & mask_prices

# missing_price_rows = enriched_df.loc[mask, ["User Id", "Full Name", "Base price", "Price"]]

# print("✅ Found rows with missing Base price or Price:", len(missing_price_rows))
# if not missing_price_rows.empty:
#     display(missing_price_rows)

# # 2️⃣ Make sure IDs are numeric
# enriched_df["User Id"] = pd.to_numeric(enriched_df["User Id"], errors="coerce").astype("Int64")
# st_2425["User Id"] = pd.to_numeric(st_2425["User Id"], errors="coerce").astype("Int64")

# # 3️⃣ Build price lookup by User Id
# price_lookup = (
#     st_2425
#     .drop_duplicates(subset=["User Id"])
#     .set_index("User Id")[["Base price", "Price"]]
# )

# # 4️⃣ Fill by User Id
# enriched_df["Base price"] = enriched_df["Base price"].combine_first(
#     enriched_df["User Id"].map(price_lookup["Base price"])
# )
# enriched_df["Price"] = enriched_df["Price"].combine_first(
#     enriched_df["User Id"].map(price_lookup["Price"])
# )

# # 5️⃣ Recheck what is still missing after User Id filling
# mask_missing_after_id = mask_season & (enriched_df["Base price"].isna() | enriched_df["Price"].isna())

# still_missing_prices = enriched_df.loc[
#     mask_missing_after_id,
#     ["User Id", "Full Name", "Base price", "Price"]
# ]

# filled_count = len(missing_price_rows) - len(still_missing_prices)

# print("✅ Filled by User Id:", filled_count)
# print("✅ Remaining after User Id filling:", len(still_missing_prices))
# if not still_missing_prices.empty:
#     display(still_missing_prices)

# # 6️⃣ Further narrow down: exclude those Renewed
# mask_not_renewed = enriched_df["RenewSeasonTicket"] != "חידש"
# final_mask = mask_season & (enriched_df["Base price"].isna() | enriched_df["Price"].isna()) & mask_not_renewed

# problematic_rows = enriched_df.loc[
#     final_mask,
#     ["User Id", "Full Name", "Base price", "Price", "RenewSeasonTicket"]
# ]

# print("✅ Rows missing prices and not renewed:", len(problematic_rows))
# if not problematic_rows.empty:
#     display(problematic_rows)

# # 7️⃣ Fallback: fill by Full Name
# # Make sure st_2425 has 'Full Name' column
# if "Fan / Company" in st_2425.columns:
#     st_2425 = st_2425.rename(columns={"Fan / Company": "Full Name"})

# fallback_lookup = (
#     st_2425
#     .drop_duplicates(subset=["Full Name"])
#     .set_index("Full Name")[["Base price", "Price"]]
# )

# # Fill Base price by Full Name
# before_fallback_base = enriched_df["Base price"].isna().sum()
# before_fallback_price = enriched_df["Price"].isna().sum()

# enriched_df["Base price"] = enriched_df["Base price"].combine_first(
#     enriched_df["Full Name"].map(fallback_lookup["Base price"])
# )
# enriched_df["Price"] = enriched_df["Price"].combine_first(
#     enriched_df["Full Name"].map(fallback_lookup["Price"])
# )

# after_fallback_base = enriched_df["Base price"].isna().sum()
# after_fallback_price = enriched_df["Price"].isna().sum()

# print("✅ Filled Base price by Full Name:", before_fallback_base - after_fallback_base)
# print("✅ Filled Price by Full Name:", before_fallback_price - after_fallback_price)

# # 8️⃣ Recheck final missing prices
# mask_final_missing = mask_season & (enriched_df["Base price"].isna() | enriched_df["Price"].isna()) & mask_not_renewed

# still_missing_after_fallback = enriched_df.loc[
#     mask_final_missing,
#     ["User Id", "Full Name", "Base price", "Price"]
# ]

# print("✅ Final remaining rows still missing prices:", len(still_missing_after_fallback))
# if not still_missing_after_fallback.empty:
#     display(still_missing_after_fallback)

### Add tickets count for new users:

In [None]:
# # Load data
# home = pd.read_csv("home.csv")
# home["User Id"] = pd.to_numeric(home["User Id"], errors="coerce").astype("Int64")
# home = home[[
#     "Product",
#     "Status",
#     "User Id",
#     "Type",
#     "Base price",
#     "Price"
# ]]

# away = pd.read_csv("away.csv")
# away["User Id"] = pd.to_numeric(away["User Id"], errors="coerce").astype("Int64")
# away = away[[
#     "Product",
#     "Status",
#     "User Id",
#     "Type",
#     "Base price",
#     "Price"
# ]]

# # Define the function
# def count_unique_products(df, prefix=""):
#     """
#     Counts unique products per User Id in the given DataFrame,
#     split into free and paid tickets, considering only rows with:
#     Status == "Active" and Type == "Sale".
#     """
#     filtered = df[
#         (df["Status"] == "Active") &
#         (df["Type"] == "Sale")
#     ]
    
#     free = filtered[filtered["Price"] == 0]
#     paid = filtered[filtered["Price"] != 0]
    
#     free_counts = (
#         free
#         .groupby("User Id")["Product"]
#         .nunique()
#         .reset_index()
#         .rename(columns={"Product": f"Count Free {prefix} Ticket"})
#     )
    
#     paid_counts = (
#         paid
#         .groupby("User Id")["Product"]
#         .nunique()
#         .reset_index()
#         .rename(columns={"Product": f"Count Paid {prefix} Ticket"})
#     )
    
#     counts_df = pd.merge(
#         free_counts,
#         paid_counts,
#         how="outer",
#         on="User Id"
#     )
    
#     return counts_df

# home_new = home[home["User Id"].isin(new_user_ids)]
# away_new = away[away["User Id"].isin(new_user_ids)]

# home_counts_new = count_unique_products(home_new, prefix="Home")
# away_counts_new = count_unique_products(away_new, prefix="Away")

# enriched_df = enriched_df.merge(
#     home_counts_new,
#     how="left",
#     on="User Id"
# )

# enriched_df = enriched_df.merge(
#     away_counts_new,
#     how="left",
#     on="User Id"
# )

# # Fill NA only in the count columns and convert to int
# count_cols = [
#     "Count Free Home Ticket",
#     "Count Paid Home Ticket",
#     "Count Free Away Ticket",
#     "Count Paid Away Ticket"
# ]

# # Find which of these columns actually exist
# existing_count_cols = [col for col in count_cols if col in enriched_df.columns]

# print(f"✅ Found {len(existing_count_cols)} count columns to process: {existing_count_cols}")

# # Fill NA only for existing columns
# for col in existing_count_cols:
#     enriched_df[col] = enriched_df[col].fillna(0).astype(int)

### Fix members who didn't have price using all st_2526 and not only todays:

In [None]:
# # Load the file from robo
# st_2526 = pd.read_csv('List_Report_20250707091028.csv')

# # Make sure it's clean and deduplicated
# st_2526_unique = (
#     st_2526
#     .sort_values("Date", ascending=False)   # or any column to define priority
#     .drop_duplicates(subset=["Fan / Company"])
# )

# # 1️⃣ Mask: which rows need fixing
# mask_missing = (
#     (enriched_df["RenewSeasonTicket"] == "חידש") &
#     (enriched_df["Renew Base Price"].isna())
# )

# # 2️⃣ How many need fixing
# print("✅ Rows needing fixing:", mask_missing.sum())

# # 3️⃣ Subset the rows to fix
# to_fix = enriched_df.loc[mask_missing, ["Full Name"]].merge(
#     st_2526_unique[[
#         "Fan / Company",
#         "User Id",
#         "assign using  ID number",
#         "Email",
#         "Phone",
#         "Base price",
#         "Price",
#         "Price area",
#         "Ticket price types"
#     ]],
#     left_on="Full Name",
#     right_on="Fan / Company",
#     how="left"
# )

# # 4️⃣ Validation: did any rows fail to find a match?
# missing_matches = to_fix["User Id"].isna().sum()
# print("⚠️ Rows with no matching st_2526 data:", missing_matches)
# if missing_matches > 0:
#     print(to_fix[to_fix["User Id"].isna()])

# # 5️⃣ Show how many rows will be updated
# print("✅ Rows with matching data to update:", len(to_fix) - missing_matches)

# # 6️⃣ Iterate and update
# updated_count = 0

# for _, row in to_fix.iterrows():
#     name = row["Fan / Company"]

#     # Skip rows with no match
#     if pd.isna(row["User Id"]):
#         continue

#     # Mask in enriched_df
#     mask = (enriched_df["Full Name"] == name)

#     # Overwrite fields
#     enriched_df.loc[mask, "User Id"] = row["User Id"]
#     enriched_df.loc[mask, "TZ"] = row["assign using  ID number"]
#     enriched_df.loc[mask, "Email"] = row["Email"]
#     enriched_df.loc[mask, "Phone"] = row["Phone"]

#     enriched_df.loc[mask, "Renew Base Price"] = row["Base price"]
#     enriched_df.loc[mask, "RenewPrice"] = row["Price"]
#     enriched_df.loc[mask, "Renew Price Area"] = row["Price area"]
#     enriched_df.loc[mask, "RenewPriceType"] = row["Ticket price types"]

#     updated_count += mask.sum()

# # 7️⃣ Summary
# print(f"\n✅ Finished updating {updated_count} enriched_df rows.")

# # 8️⃣ Show any rows still missing Renew Base Price
# remaining_missing = enriched_df[
#     (enriched_df["RenewSeasonTicket"] == "חידש") &
#     (enriched_df["Renew Base Price"].isna())
# ]
# print("\n✅ Remaining rows still missing Renew Base Price after fill:", len(remaining_missing))
# if not remaining_missing.empty:
#     display(remaining_missing[["Full Name", "Renew Base Price", "RenewPrice"]])

# enriched_df.loc[mask_missing]

### Add community/closelink:

In [None]:
# 1. Read your file
working_games_with_ids = pd.read_excel('working_games_with_ids.xlsx')

# Create Community column first
working_games_with_ids["Community"] = (
    working_games_with_ids["Type"].astype(str) + " " + working_games_with_ids["Name"].astype(str)
)

# Group by Full Name
aggregated_df = (
    working_games_with_ids
    .groupby("Full Name")
    .agg({
        "Community": lambda x: " | ".join(x),           # Concatenate all Communities
        "assign using  ID number": "first",              # Keep one ID
        "Age": "first",                                  # Keep one Age (or you can use list)
        "Type": "count"                                  # Count of rows
    })
    .rename(columns={"Type": "Community Count"})
    .reset_index()
)


enriched_df = enriched_df.merge(aggregated_df[['assign using  ID number', 'Community', 'Community Count']],
                                left_on='TZ',
                                right_on='assign using  ID number',
                                how='left')

enriched_df = enriched_df.drop(columns=['assign using  ID number'])

def deduplicate_values(cell):
    if pd.isna(cell):
        return cell
    parts = [part.strip() for part in cell.split('|')]
    unique_parts = list(dict.fromkeys(parts))  # Preserves order and removes duplicates
    return ' | '.join(unique_parts)

enriched_df['Community'] = enriched_df['Community'].apply(deduplicate_values)

enriched_df

### Add bus:

In [None]:
bus = pd.read_excel('מאסטר קמפיין מנויים 2025_26.xlsx', sheet_name='הסעות עונת 2425')

In [None]:
bus = bus.rename(columns={"שם": "Full Name",
                           "הסעות": "הסעות עונת 2425",
                           "הערות הסעות": "Bus notes",
                           "פלאפון": "Phone"})


bus['Phone'] = bus['Phone'].str.replace(r'^972-', '0', regex=True)

bus = bus.drop_duplicates(subset=['Phone'])

enriched_df = enriched_df.merge(bus[['Full Name', 'Phone', 'הסעות עונת 2425']], on='Phone', how='left', suffixes=('', '_bus'))

enriched_df = enriched_df.drop(columns=['Full Name_bus'])
enriched_df

In [27]:
merged_df.to_excel("new.xlsx")

# Update the report:

## Start Here:

In [1]:
import pandas as pd
import numpy as np
from datetime import date, timedelta
from IPython.display import display

## Read all files:

In [16]:
merged_df = pd.read_excel('מאסטר קמפיין מנויים 2025_26.xlsx', sheet_name='היום')

# Load users file of the last day from Salesforce saved reports
users_new = pd.read_excel('Users report-2025-08-25-23-19-19.xlsx')

# Load the st members file of the last day from Roboticket
st_2526 = pd.read_csv('List_Report_20250826091819.csv')

# Load renew st members file of the last day from Roboticket
renew = pd.read_csv('SeasonTicketRenewReport_2025-08-26.csv')

# Load and clean users file of the last day from Roboticket
users = (
    pd.read_csv('UserCreatedReport_2025-08-26 09_18.csv')
    .drop_duplicates(subset=['userid'])
)

### Add users that created in sf yesterday and today:

In [15]:
users_new.columns

Index(['Account Name', 'Unnamed: 1', 'ID', 'HJBC ID', 'Phone', 'Email', 'Age',
       'City', 'Street', 'Last Activity', 'Created Date', 'Is Lite User',
       'Is SSO User', 'Marketing Allowed', 'Season Ticket Revenue'],
      dtype='object')

In [17]:
# Assumes you already have `merged_df` and `users_new` in memory
# and pandas imported as pd

# =========================
# 0) Setup & Diagnostics
# =========================
enriched_df = merged_df.copy()   # work on a copy so original stays intact

print("✅ merged_df shape BEFORE:", merged_df.shape)

cols_to_check = ["Phone", "Email", "Full Name", "TZ", "Marketing Allowed"]
print("\n✅ Missing values in merged_df BEFORE:")
for col in cols_to_check:
    print(f"   - {col}: {merged_df[col].isna().sum()} missing")

# Also report on city/City if present
if "city" in merged_df.columns:
    print(f"   - city: {merged_df['city'].isna().sum()} missing")
if "City" in merged_df.columns:
    print(f"   - City: {merged_df['City'].isna().sum()} missing")

# =========================
# 1) Align key dtypes
# =========================
enriched_df["User Id"] = pd.to_numeric(enriched_df["User Id"], errors="coerce").astype("Int64")
users_new["HJBC ID"]   = pd.to_numeric(users_new["HJBC ID"], errors="coerce").astype("Int64")
# users_new["ID"]        = pd.to_numeric(users_new["ID"], errors="coerce").astype("Int64")  # uncomment if needed

# =========================
# 2) Normalize Marketing Allowed to boolean
# =========================
users_new["Marketing Allowed"] = (
    users_new["Marketing Allowed"]
        .map({1: True, 1.0: True, 0: False, 0.0: False, True: True, False: False})
)

# =========================
# 3) Build a one-to-one lookup from users_new (deduplicated by HJBC ID)
# =========================
keep_cols = [
    "HJBC ID",
    "Phone",
    "Email",
    "Account Name",       # for Full Name
    "ID",                 # for TZ
    "Marketing Allowed",
    "Created Date",       # not merged now, but available
]
# Add City if it exists in users_new (capital C as you mentioned)
if "City" in users_new.columns:
    keep_cols.append("City")

users_keyed = (
    users_new[keep_cols]
    .dropna(subset=["HJBC ID"])
    .drop_duplicates("HJBC ID", keep="last")
    .set_index("HJBC ID")
)

# Convenience Series for mapping
phone_lu   = users_keyed.get("Phone")
email_lu   = users_keyed.get("Email")
name_lu    = users_keyed.get("Account Name")
tz_lu      = users_keyed.get("ID")
mkt_lu     = users_keyed.get("Marketing Allowed")
city_lu    = users_keyed.get("City") if "City" in users_keyed.columns else None

# =========================
# 4) Optional: count "new" users in users_new that are NOT in merged_df
# =========================
existing_ids = set(enriched_df["User Id"].dropna().unique())
new_ids = set(users_keyed.index.dropna().unique()) - existing_ids
print(f"\nℹ️ Found {len(new_ids)} user(s) present in users_new but not in merged_df.")

# =========================
# 5) Fill ONLY missing values in enriched_df from lookup (no shape change)
# =========================
id_key = enriched_df["User Id"]

# Phone
if phone_lu is not None:
    to_fill = enriched_df["Phone"].isna()
    mapped = id_key.map(phone_lu)
    print(f"✅ Phone: filling {int((to_fill & mapped.notna()).sum())} rows")
    enriched_df.loc[to_fill, "Phone"] = mapped

# Email
if email_lu is not None:
    to_fill = enriched_df["Email"].isna()
    mapped = id_key.map(email_lu)
    print(f"✅ Email: filling {int((to_fill & mapped.notna()).sum())} rows")
    enriched_df.loc[to_fill, "Email"] = mapped

# Full Name <- Account Name
if name_lu is not None:
    to_fill = enriched_df["Full Name"].isna()
    mapped = id_key.map(name_lu)
    print(f"✅ Full Name: filling {int((to_fill & mapped.notna()).sum())} rows")
    enriched_df.loc[to_fill, "Full Name"] = mapped

# TZ <- users_new['ID']
if tz_lu is not None:
    to_fill = enriched_df["TZ"].isna()
    mapped = id_key.map(tz_lu)
    print(f"✅ TZ: filling {int((to_fill & mapped.notna()).sum())} rows")
    enriched_df.loc[to_fill, "TZ"] = mapped

# Marketing Allowed
if mkt_lu is not None:
    to_fill = enriched_df["Marketing Allowed"].isna()
    mapped = id_key.map(mkt_lu)
    print(f"✅ Marketing Allowed: filling {int((to_fill & mapped.notna()).sum())} rows")
    enriched_df.loc[to_fill, "Marketing Allowed"] = mapped

# City -> prefer existing 'city' column, fill its nulls from:
#   1) existing 'City' column (if present in enriched_df),
#   2) users_new['City'] mapped via HJBC ID.
if city_lu is not None or ("City" in enriched_df.columns):
    mapped_city = id_key.map(city_lu) if city_lu is not None else None

    if "city" in enriched_df.columns and "City" in enriched_df.columns:
        before = int(enriched_df["city"].isna().sum())
        enriched_df["city"] = enriched_df["city"].combine_first(enriched_df["City"])
        if mapped_city is not None:
            enriched_df["city"] = enriched_df["city"].combine_first(mapped_city)
        after = int(enriched_df["city"].isna().sum())
        print(f"✅ city: filled {before - after} rows (combined existing 'City' + users_new mapping), keeping 'city' as canonical")
        # drop the extra 'City' column to avoid confusion
        enriched_df = enriched_df.drop(columns=["City"])

    elif "city" in enriched_df.columns:
        before = int(enriched_df["city"].isna().sum())
        if mapped_city is not None:
            enriched_df["city"] = enriched_df["city"].combine_first(mapped_city)
        # If only 'City' exists but not mapped_city (rare), try combine_first from enriched_df['City']
        if "City" in enriched_df.columns:
            enriched_df["city"] = enriched_df["city"].combine_first(enriched_df["City"])
            enriched_df = enriched_df.drop(columns=["City"])
        after = int(enriched_df["city"].isna().sum())
        print(f"✅ city: filled {before - after} rows from available sources")

    elif "City" in enriched_df.columns:
        # No lowercase 'city' yet; keep capitalized but still fill from mapping, then rename to canonical 'city'
        before = int(enriched_df["City"].isna().sum())
        if mapped_city is not None:
            enriched_df["City"] = enriched_df["City"].combine_first(mapped_city)
        after = int(enriched_df["City"].isna().sum())
        print(f"✅ City: filled {before - after} rows from users_new['City']; renaming to 'city'")
        enriched_df = enriched_df.rename(columns={"City": "city"})

    else:
        # Neither exists yet — create canonical 'city' from mapping
        enriched_df["city"] = mapped_city
        print("✅ city: created from users_new['City'] (no existing city/City column found)")
else:
    # If no City in users_new and no City column in enriched_df, do nothing
    if "City" in enriched_df.columns and "city" not in enriched_df.columns:
        # if there's only 'City' in enriched_df, standardize to 'city'
        enriched_df = enriched_df.rename(columns={"City": "city"})
        print("ℹ️ Standardized existing 'City' column to 'city' (no City data in users_new).")

# =========================
# 5b) ADD any "new" users not already in enriched_df
# =========================
if len(new_ids) > 0:
    missing_users = users_keyed.loc[list(new_ids)].reset_index()

    # Decide canonical city column name for the target df
    city_target_col = "city"

    # Rename to match enriched_df column names
    rename_map = {
        "HJBC ID": "User Id",
        "Account Name": "Full Name",
        "ID": "TZ",
    }
    if "City" in missing_users.columns:
        rename_map["City"] = city_target_col

    missing_users = missing_users.rename(columns=rename_map)

    # Ensure column order / fill any missing cols with NaN
    for col in enriched_df.columns:
        if col not in missing_users.columns:
            missing_users[col] = pd.NA
    missing_users = missing_users[enriched_df.columns]

    # Append
    enriched_df = pd.concat([enriched_df, missing_users], ignore_index=True)
    print(f"✅ Added {missing_users.shape[0]} new user(s) from users_new (with '{city_target_col}' populated).")
else:
    print("ℹ️ No new users to add.")

# =========================
# 6) Post-diagnostics
# =========================
print("\n✅ enriched_df shape AFTER:", enriched_df.shape)
print("✅ Missing values in enriched_df AFTER:")
for col in cols_to_check:
    print(f"   - {col}: {enriched_df[col].isna().sum()} missing")
if "city" in enriched_df.columns:
    print(f"   - city: {enriched_df['city'].isna().sum()} missing")

# Preview
print("\n✅ Preview of enriched data:")
enriched_df.head(10)


✅ merged_df shape BEFORE: (21800, 39)

✅ Missing values in merged_df BEFORE:
   - Phone: 7779 missing
   - Email: 6309 missing
   - Full Name: 0 missing
   - TZ: 0 missing
   - Marketing Allowed: 0 missing
   - city: 7030 missing

ℹ️ Found 329 user(s) present in users_new but not in merged_df.
✅ Phone: filling 0 rows
✅ Email: filling 0 rows
✅ Full Name: filling 0 rows
✅ TZ: filling 0 rows
✅ Marketing Allowed: filling 0 rows
✅ city: filled 0 rows from available sources
✅ Added 329 new user(s) from users_new (with 'city' populated).

✅ enriched_df shape AFTER: (22129, 39)
✅ Missing values in enriched_df AFTER:
   - Phone: 8021 missing
   - Email: 6637 missing
   - Full Name: 0 missing
   - TZ: 0 missing
   - Marketing Allowed: 0 missing
   - city: 7316 missing

✅ Preview of enriched data:


  enriched_df = pd.concat([enriched_df, missing_users], ignore_index=True)
  enriched_df = pd.concat([enriched_df, missing_users], ignore_index=True)


Unnamed: 0,Assignment,Status,Comments,קמפיין,הערות קמפיין,Full Name,TZ,User Id,Email,Phone,...,isorganization,RiseAbove2425,ST_Count,Count Free Home Ticket,Count Paid Home Ticket,Count Free Away Ticket,Count Paid Away Ticket,Community,Community Count,הסעות עונת 2425
0,נדב,לא יחדש,"לא יחדש, מעדיף לשים את הכסף והזמן על הקבוצת רגל.",ירושלמי - רוכשי כרטיסים ללא מנוי,,גלעד גריבסקי,205461999,10008,3gilad3@gmail.com,,...,False,False,5,0,0,0,0,,0,ללא
1,,חידש,,,,שי קציר,40175192,10010,katzirs30@gmail.com,502877926.0,...,False,False,10,5,3,0,3,,0,ללא
2,עידן,חידש,,מחדשי 2024/25 רגילים,,שלמה קוטלר,94938,10011,rachel@kotler-adika.co.il,544755212.0,...,False,False,10,6,4,0,0,,0,ללא
3,עדי,חידש,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,,יונתן אנסלמן,38818340,10528,yonie84@gmail.com,524567167.0,...,False,False,10,4,1,0,1,,0,ללא
4,נדב,לא יחדש,עבר לאתונה,מנויי עבר ללא מנוי השנה - חוזרים הביתה,,רז שמעון,10531,10531,,547995767.0,...,False,False,9,0,0,0,1,,0,ללא
5,ליבי,לא זמין,המספר לא מחובר (שלחנו הודעה),מנויי עבר ללא מנוי השנה - חוזרים הביתה,,קובי קמר,10534,10534,,547750464.0,...,False,False,1,0,1,0,0,,0,ללא
6,,חידש,,,,עידו פלדור,33486960,10536,paldor1@gmail.com,508946047.0,...,False,False,10,1,1,0,0,,0,ללא
7,,חידש,,,,דרור מיג'אן,54906474,10538,drormijan@gmail.com,506272772.0,...,False,False,10,17,6,0,6,,0,ללא
8,,חידש,,,,ירון פריד,59640961,10541,yaryar@012.net.il,546373022.0,...,False,False,10,2,1,0,5,,0,ללא
9,,חידש,,,,חיים קיסר,59776435,10542,keisarh@walla.co.il,542009358.0,...,False,False,8,1,1,0,2,,0,ללא


### Ensure those columns "Assignment", "Status", "Comments", "קמפיין" passed correctly:

In [19]:
# Columns to check
cols_to_check = ["Assignment", "Status", "Comments", "קמפיין"]

print("✅ Comparing non-empty counts in merged_df vs enriched_df:\n")

for col in cols_to_check:
    count_start = merged_df[col].notna().sum()
    count_after = enriched_df[col].notna().sum()
    
    print(f"🔹 Column '{col}':")
    print(f"   - In merged_df:   {count_start} non-empty")
    print(f"   - In enriched_df: {count_after} non-empty")
    
    if count_start == count_after:
        print("   ✅ Counts MATCH.\n")
    else:
        print("   ⚠️ Counts DIFFER!\n")

✅ Comparing non-empty counts in merged_df vs enriched_df:

🔹 Column 'Assignment':
   - In merged_df:   4006 non-empty
   - In enriched_df: 4006 non-empty
   ✅ Counts MATCH.

🔹 Column 'Status':
   - In merged_df:   7537 non-empty
   - In enriched_df: 7537 non-empty
   ✅ Counts MATCH.

🔹 Column 'Comments':
   - In merged_df:   4838 non-empty
   - In enriched_df: 4838 non-empty
   ✅ Counts MATCH.

🔹 Column 'קמפיין':
   - In merged_df:   4187 non-empty
   - In enriched_df: 4187 non-empty
   ✅ Counts MATCH.



### Add people that were missing from renewal & excluding ONE TEAM ONE FAMILY:

In [20]:
# Save original mapping for reference
st_2526_reference = st_2526[['User Id', 'Fan / Company', 'assign using  ID number']].drop_duplicates().copy()

names_to_exclude = [
    "בטי דאוי",
    "אריה דאוי",
    "תומר דאוי",
    "עדן דאוי",
    'בועז זוסמן',
    'מיקה זוסמן',
    'עופר ברעם',
    'איתי ברק',
    'יהונתן קפלן',
    'תהילה קפלן',
    'הראל רפאל טלקר',
    'אורי חיון',
    'שרית זוסמן',
    'צבי זוסמן',
    'עילי שוורצמן',
    'אורן שוורצמן',
    'רונן נהרי',
    'בר נהרי',
    'עומר נהרי',
    'טקס קהילה 11/9',
    'Toptix Ltd'
]

emails_to_exclude = [
    "msleon123@gmail.com",
    'hareven@gmail.com',
    'avalid59@gmail.com',
    'gordonsarah01@gmail.com',
    'yishai757@gmail.com',
    'laor.print@gmail.com',
    'rois@nvidia.com',
    'roisasson6@gmail.com'
]

# Define exclusions if needed
names_to_exclude = []  
emails_to_exclude = []  

print("✅ Loaded renew:", renew.shape)

# Convert RenewTransactionDate to datetime (allow mixed formats)
renew["RenewTransactionDate"] = pd.to_datetime(
    renew["RenewTransactionDate"],
    errors="coerce",
    format="mixed"
)
print("✅ Parsed RenewTransactionDate, null dates:", renew["RenewTransactionDate"].isna().sum())

# Filter to today or tomorrow
# today = date.today()
# tomorrow = today + timedelta(days=1)

# before_date_filter = renew.shape[0]
# renew = renew[
#     renew["RenewTransactionDate"].dt.date.isin([today, tomorrow])
# ]
# after_date_filter = renew.shape[0]
# print(f"✅ Filtered to today/tomorrow: {after_date_filter} rows (was {before_date_filter})")

# # Remove duplicate users
# before_dedup = renew.shape[0]
renew = renew.drop_duplicates(subset="User").copy()
# after_dedup = renew.shape[0]
# print(f"✅ Dropped {before_dedup - after_dedup} duplicate User rows")

# Make sure emails are clean
renew["Email"] = renew["Email"].astype(str).str.strip().str.lower()
enriched_df["Email"] = enriched_df["Email"].astype(str).str.strip().str.lower()

# Ensure names are strings
renew["First name"] = renew["First name"].astype(str)
renew["Last name"] = renew["Last name"].astype(str)

# Filter out rows where names contain 'בדיקה'
mask_bad_name = (
    renew["First name"].str.contains("בדיקה", na=False) |
    renew["Last name"].str.contains("בדיקה", na=False)
)
print("✅ Rows containing 'בדיקה':", mask_bad_name.sum())
renew = renew[~mask_bad_name].copy()

# Filter out disallowed OldPriceType
mask_bad_price = renew["OldPriceType"].isin(["מחלקת נוער בי\\", "מחלקת נוער"])
print("✅ Rows with disallowed OldPriceType:", mask_bad_price.sum())
renew = renew[~mask_bad_price].copy()

# Exclude sectors containing 'גלריה'
renew["OldSector"] = renew["OldSector"].astype(str)
mask_gallery = renew["OldSector"].str.contains("גלריה", na=False)
print("✅ Rows with 'גלריה':", mask_gallery.sum())
renew = renew[~mask_gallery].copy()

# Exclude rows with bad emails
mask_bad_email = renew["Email"].str.contains("roboticket|hapoel|adfam", na=False)
print("✅ Rows with bad email patterns:", mask_bad_email.sum())
renew = renew[~mask_bad_email].copy()

# Exclude rows by name or email if you have exclusion lists
renew["Full Name"] = renew["First name"].fillna('') + " " + renew["Last name"].fillna('')
renew = renew[~renew["Full Name"].isin(names_to_exclude)]
renew = renew[~renew["Email"].isin(emails_to_exclude)]
print("✅ After filtering renew rows:", renew.shape)

# Set RenewSeasonTicket flag
renew["RenewSeasonTicket"] = "חידש"

# Rename User to TZ
renew = renew.rename(columns={"User": "TZ"})

# Convert TZ to numeric
renew["TZ"] = pd.to_numeric(renew["TZ"], errors="coerce").astype("Int64")
st_2526_reference["assign using  ID number"] = pd.to_numeric(
    st_2526_reference["assign using  ID number"], errors="coerce"
).astype("Int64")

# Merge to fill User Id
renew = renew.merge(
    st_2526_reference,
    left_on="TZ",
    right_on="assign using  ID number",
    how="left",
    suffixes=("", "_from_reference")
)

# Rename User Id column
renew = renew.rename(columns={
    "User Id_from_reference": "User Id"
})

# Check missing User Ids
still_missing_user_ids = renew["User Id"].isna().sum()
filled_user_ids = renew.shape[0] - still_missing_user_ids
print(f"✅ Filled User Ids from reference: {filled_user_ids}")
print(f"⚠️ Remaining rows missing User Id after merge: {still_missing_user_ids}")

# Build seat strings
def clean_number(x):
    if pd.isna(x):
        return ''
    if isinstance(x, float) and x.is_integer():
        return str(int(x))
    return str(x)

def build_old_seat(row):
    if pd.isna(row['OldSector']) or pd.isna(row['OldRow']) or pd.isna(row['OldNumber']):
        return None
    return (
        str(row['OldSector']) +
        " שורה " +
        clean_number(row['OldRow']) +
        " כיסא " +
        clean_number(row['OldNumber'])
    )

def build_renew_seat(row):
    if pd.isna(row['RenewSector']) or pd.isna(row['RenewRow']) or pd.isna(row['RenewNumber']):
        return None
    return (
        str(row['RenewSector']) +
        " שורה " +
        clean_number(row['RenewRow']) +
        " כיסא " +
        clean_number(row['RenewNumber'])
    )

renew['Old Seat'] = renew.apply(build_old_seat, axis=1)
renew['Renew Seat'] = renew.apply(build_renew_seat, axis=1)
print("✅ Built Old Seat and Renew Seat columns")

# Drop unneeded columns
renew = renew.drop(columns=[
    "Fan / Company",
    "assign using  ID number",
    "School",
    "Last name",
    "First name",
    "OldSector",
    "OldRow",
    "OldNumber",
    "RenewSector",
    "RenewRow",
    "RenewNumber"
])

print("✅ Renew after cleanup:", renew.shape)

# Merge renew into enriched_df to update ALL columns
enriched_df = enriched_df.merge(
    renew,
    on="User Id",
    how="left",
    suffixes=("", "_renew")
)

# Ensure RenewTransactionDate columns are datetime
for col in ["RenewTransactionDate", "RenewTransactionDate_renew"]:
    if col in enriched_df.columns:
        enriched_df[col] = pd.to_datetime(
            enriched_df[col], errors="coerce", format="mixed"
        )

# Combine all fields
renew_suffix_cols = [c for c in enriched_df.columns if c.endswith("_renew")]
for col in renew_suffix_cols:
    base_col = col.replace("_renew", "")
    enriched_df[base_col] = enriched_df[col].combine_first(enriched_df[base_col])

# Drop helper columns
enriched_df = enriched_df.drop(columns=renew_suffix_cols)

print("✅ All renewal data merged and updated.")

enriched_df.head(10)

✅ Loaded renew: (4636, 20)
✅ Parsed RenewTransactionDate, null dates: 1568
✅ Rows containing 'בדיקה': 3
✅ Rows with disallowed OldPriceType: 504
✅ Rows with 'גלריה': 108
✅ Rows with bad email patterns: 14
✅ After filtering renew rows: (3147, 21)
✅ Filled User Ids from reference: 2490
⚠️ Remaining rows missing User Id after merge: 665
✅ Built Old Seat and Renew Seat columns
✅ Renew after cleanup: (3155, 15)
✅ All renewal data merged and updated.


Unnamed: 0,Assignment,Status,Comments,קמפיין,הערות קמפיין,Full Name,TZ,User Id,Email,Phone,...,isorganization,RiseAbove2425,ST_Count,Count Free Home Ticket,Count Paid Home Ticket,Count Free Away Ticket,Count Paid Away Ticket,Community,Community Count,הסעות עונת 2425
0,נדב,לא יחדש,"לא יחדש, מעדיף לשים את הכסף והזמן על הקבוצת רגל.",ירושלמי - רוכשי כרטיסים ללא מנוי,,גלעד גריבסקי,205461999,10008,3gilad3@gmail.com,,...,False,False,5,0,0,0,0,,0,ללא
1,,חידש,,,,שי קציר,40175192,10010,katzirs30@gmail.com,502877926.0,...,False,False,10,5,3,0,3,,0,ללא
2,עידן,חידש,,מחדשי 2024/25 רגילים,,שלמה קוטלר,94938,10011,rachel@kotler-adika.co.il,544755212.0,...,False,False,10,6,4,0,0,,0,ללא
3,עדי,חידש,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,,יונתן אנסלמן,38818340,10528,yonie84@gmail.com,524567167.0,...,False,False,10,4,1,0,1,,0,ללא
4,נדב,לא יחדש,עבר לאתונה,מנויי עבר ללא מנוי השנה - חוזרים הביתה,,רז שמעון,10531,10531,,547995767.0,...,False,False,9,0,0,0,1,,0,ללא
5,ליבי,לא זמין,המספר לא מחובר (שלחנו הודעה),מנויי עבר ללא מנוי השנה - חוזרים הביתה,,קובי קמר,10534,10534,,547750464.0,...,False,False,1,0,1,0,0,,0,ללא
6,,חידש,,,,עידו פלדור,33486960,10536,paldor1@gmail.com,508946047.0,...,False,False,10,1,1,0,0,,0,ללא
7,,חידש,,,,דרור מיג'אן,54906474,10538,drormijan@gmail.com,506272772.0,...,False,False,10,17,6,0,6,,0,ללא
8,,חידש,,,,ירון פריד,59640961,10541,yaryar@012.net.il,546373022.0,...,False,False,10,2,1,0,5,,0,ללא
9,,חידש,,,,חיים קיסר,59776435,10542,keisarh@walla.co.il,542009358.0,...,False,False,8,1,1,0,2,,0,ללא


### Check new members of st_2526:

In [21]:
print("✅ Loaded st_2526:", st_2526.shape)

# Clean seat numbers
def clean_number(x):
    if pd.isna(x):
        return ''
    if isinstance(x, float) and x.is_integer():
        return str(int(x))
    return str(x)

# Build Renew Seat
def build_renew_seat(row):
    if pd.isna(row['Area']) or pd.isna(row['Row']) or pd.isna(row['Number']):
        return None
    return (
        str(row['Area']) +
        " שורה " +
        clean_number(row['Row']) +
        " כיסא " +
        clean_number(row['Number'])
    )

# Compute Renew Seat
st_2526['Renew Seat'] = st_2526.apply(build_renew_seat, axis=1)

# Rename columns
st_2526 = st_2526.rename(columns={
    'Id': 'RenewSeasonTicketId',
    'Ticket price types': 'RenewPriceType',
    'Price area': 'Renew Price Area',
    "Fan / Company": "Full Name",
    "assign using  ID number": "TZ",
    "Date": "RenewTransactionDate",
    "Price": 'RenewPrice', 
    "Base price": "Renew Base Price"
})

# Validate unique User Ids before filtering
initial_unique_ids = st_2526['User Id'].nunique()
print("✅ Unique User Ids before filtering:", initial_unique_ids)

# Remove Complementary
# st_2526 = st_2526[st_2526['RenewPriceType'] != 'Complementary']
# print("✅ After removing Complementary:", st_2526.shape)

# Convert RenewTransactionDate to datetime
st_2526["RenewTransactionDate"] = pd.to_datetime(
    st_2526["RenewTransactionDate"], errors="coerce"
)

# Validate date parsing
na_dates = st_2526["RenewTransactionDate"].isna().sum()
print("✅ Rows with unparsable RenewTransactionDate:", na_dates)

# Check remaining rows
if st_2526.empty:
    print("⚠️ No rows remaining after filtering.")
else:
    print("✅ Final rows to process:", st_2526.shape[0])

# Select final columns
st_2526 = st_2526[[
    "Full Name",
    "TZ",
    "User Id",
    "Email",
    "Phone",
    "Renew Base Price",
    "RenewPrice",
    "Renew Price Area",
    "RenewPriceType",
    "RenewSeasonTicketId",
    "Renew Seat",
    "RenewTransactionDate"
]]

# Deduplicate by User Id
before_dedup = st_2526.shape[0]
st_2526 = st_2526.drop_duplicates(subset='User Id')
after_dedup = st_2526.shape[0]
print(f"✅ Dropped {before_dedup - after_dedup} duplicate User Id rows.")

# Preview
print("\n✅ Preview of st_2526:")
st_2526.head()

✅ Loaded st_2526: (3826, 53)
✅ Unique User Ids before filtering: 2971
✅ Rows with unparsable RenewTransactionDate: 3
✅ Final rows to process: 3826
✅ Dropped 854 duplicate User Id rows.

✅ Preview of st_2526:


  st_2526["RenewTransactionDate"] = pd.to_datetime(


Unnamed: 0,Full Name,TZ,User Id,Email,Phone,Renew Base Price,RenewPrice,Renew Price Area,RenewPriceType,RenewSeasonTicketId,Renew Seat,RenewTransactionDate
0,וואלה טורס,513845461,1355074.0,,,20000.0,20000.0,פרקט צד,VIP,914061.0,פרקט דרומי שורה A כיסא 10,2025-08-25 15:29:00
6,נועה דגול עמיעד,43546803,38164.0,noadagul@gmail.com,524356688.0,1150.0,977.5,F,Adult,914027.0,אולם 1 עמידה שורה A כיסא 4,2025-08-25 11:11:00
7,מיכל רוטנר,330236563,1432425.0,michalcgilboa@gmail.com,522030692.0,1450.0,1232.5,C,Adult,913964.0,אולם 5 שורה 11 כיסא 30,2025-08-24 20:47:00
8,ירדן רוטנר,226554103,1433481.0,,,1050.0,850.0,C,Child,913963.0,אולם 5 שורה 11 כיסא 31,2025-08-24 20:47:00
9,עמית בינג,335579447,1333229.0,,,700.0,700.0,D,Child,913951.0,אולם 6 שורה 14 כיסא 38,2025-08-24 17:07:00


### Fix TZ to be 9 digits:

In [22]:
# Convert to string and strip whitespace
enriched_df["TZ"] = enriched_df["TZ"].astype(str).str.strip()

# Remove '.0' suffix if present
enriched_df["TZ"] = enriched_df["TZ"].str.replace(r"\.0$", "", regex=True)

# Replace 'nan' string with proper NA
enriched_df["TZ"] = enriched_df["TZ"].replace("nan", pd.NA)

# Pad to 9 digits where applicable
def safe_zfill(x):
    if pd.isna(x):
        return None
    return x.zfill(9)

enriched_df["TZ"] = enriched_df["TZ"].apply(safe_zfill)

# Compute lengths for validation
tz_lengths = enriched_df["TZ"].dropna().apply(len)

# Count by length
length_counts = tz_lengths.value_counts().sort_index()
print("✅ TZ length distribution:\n", length_counts)

# Identify rows where TZ is shorter than 9 digits
short_tz_mask = tz_lengths < 9
short_tz_rows = enriched_df.loc[short_tz_mask.index[short_tz_mask], ["User Id", "Full Name", "TZ"]]
print("✅ Rows where TZ is still shorter than 9 digits:", len(short_tz_rows))
if not short_tz_rows.empty:
    display(short_tz_rows.head(10))

enriched_df = enriched_df.drop_duplicates(subset=["User Id"])

# Count nulls
null_tz_count = enriched_df["TZ"].isna().sum()
print("✅ Rows where TZ is null:", null_tz_count)

✅ TZ length distribution:
 TZ
9     22070
10       20
11       13
12        8
13        2
14        2
15        2
18        1
19        2
20        1
22        3
23        2
27        2
32        4
Name: count, dtype: int64
✅ Rows where TZ is still shorter than 9 digits: 0
✅ Rows where TZ is null: 0


### Add isorganization missing values:

In [23]:
# ids to Int64 (nullable)
users['userid'] = pd.to_numeric(users['userid'], errors='coerce').astype('Int64')
enriched_df['User Id'] = pd.to_numeric(enriched_df['User Id'], errors='coerce').astype('Int64')

# isorganization -> booleans, robust to NaNs / stray values
users['isorganization'] = (
    users['isorganization']
      .astype('string').str.strip().str.lower()
      .map({'yes': True, 'no': False})
      .astype('boolean')
)

enriched_df['isorganization'] = (
    enriched_df['isorganization']
      .replace({1: True, 1.0: True, 0: False, 0.0: False})
      .astype('boolean')
)

# Merge, keep only needed cols from users, and use clear suffixes
enriched_df = enriched_df.merge(
    users[['userid', 'isorganization', 'city']],
    how='left',
    left_on='User Id',
    right_on='userid',
    suffixes=('', '_from_users')
)

# Fill from users when missing
enriched_df['isorganization'] = (
    enriched_df['isorganization'].combine_first(enriched_df['isorganization_from_users'])
).fillna(False)

enriched_df['city'] = enriched_df['city'].combine_first(enriched_df['city_from_users'])

# Drop helpers
enriched_df = enriched_df.drop(columns=['userid', 'isorganization_from_users', 'city_from_users'])

# Preview
print("✅ Final enriched_df shape:", enriched_df.shape)
enriched_df.head()

✅ Final enriched_df shape: (22129, 39)


Unnamed: 0,Assignment,Status,Comments,קמפיין,הערות קמפיין,Full Name,TZ,User Id,Email,Phone,...,isorganization,RiseAbove2425,ST_Count,Count Free Home Ticket,Count Paid Home Ticket,Count Free Away Ticket,Count Paid Away Ticket,Community,Community Count,הסעות עונת 2425
0,נדב,לא יחדש,"לא יחדש, מעדיף לשים את הכסף והזמן על הקבוצת רגל.",ירושלמי - רוכשי כרטיסים ללא מנוי,,גלעד גריבסקי,205461999,10008,3gilad3@gmail.com,,...,False,False,5,0,0,0,0,,0,ללא
1,,חידש,,,,שי קציר,40175192,10010,katzirs30@gmail.com,502877926.0,...,False,False,10,5,3,0,3,,0,ללא
2,עידן,חידש,,מחדשי 2024/25 רגילים,,שלמה קוטלר,94938,10011,rachel@kotler-adika.co.il,544755212.0,...,False,False,10,6,4,0,0,,0,ללא
3,עדי,חידש,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,,יונתן אנסלמן,38818340,10528,yonie84@gmail.com,524567167.0,...,False,False,10,4,1,0,1,,0,ללא
4,נדב,לא יחדש,עבר לאתונה,מנויי עבר ללא מנוי השנה - חוזרים הביתה,,רז שמעון,10531,10531,,547995767.0,...,False,False,9,0,0,0,1,,0,ללא


## Seniority

In [24]:
seniority = pd.read_excel('Output Segments.xlsx')

# Make sure IDs are numeric
seniority["client_number"] = pd.to_numeric(seniority["client_number"], errors="coerce").astype("Int64")

# Function to get concatenated column names
def concat_columns_with_ones(row):
    cols_with_one = [col for col in row.index if row[col] in (1, 1.0)]
    return ", ".join(cols_with_one) if cols_with_one else None

# Build mapping
seniority_mapping = seniority.set_index("client_number").apply(concat_columns_with_ones, axis=1).to_dict()

# Map to enriched_df
enriched_df["Seniority_String"] = enriched_df["User Id"].map(seniority_mapping)

# Mask: override whenever we have a mapped value
mask = enriched_df["Seniority_String"].notna()

# Update (always overwrite)
enriched_df.loc[mask, "Old Season Ticket (All Owners)"] = enriched_df.loc[mask, "Seniority_String"]

# Clean up
enriched_df = enriched_df.drop(columns=["Seniority_String"])

# Create ST_Count by splitting on commas
enriched_df["ST_Count"] = (
    enriched_df["Old Season Ticket (All Owners)"]
    .fillna("")
    .apply(lambda x: len([item for item in x.split(",") if item.strip()]) if x else 0)
)

enriched_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,הערות קמפיין,Full Name,TZ,User Id,Email,Phone,...,isorganization,RiseAbove2425,ST_Count,Count Free Home Ticket,Count Paid Home Ticket,Count Free Away Ticket,Count Paid Away Ticket,Community,Community Count,הסעות עונת 2425
0,נדב,לא יחדש,"לא יחדש, מעדיף לשים את הכסף והזמן על הקבוצת רגל.",ירושלמי - רוכשי כרטיסים ללא מנוי,,גלעד גריבסקי,205461999,10008,3gilad3@gmail.com,,...,False,False,5,0,0,0,0,,0,ללא
1,,חידש,,,,שי קציר,040175192,10010,katzirs30@gmail.com,0502877926,...,False,False,10,5,3,0,3,,0,ללא
2,עידן,חידש,,מחדשי 2024/25 רגילים,,שלמה קוטלר,000094938,10011,rachel@kotler-adika.co.il,0544755212,...,False,False,10,6,4,0,0,,0,ללא
3,עדי,חידש,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,,יונתן אנסלמן,038818340,10528,yonie84@gmail.com,0524567167,...,False,False,10,4,1,0,1,,0,ללא
4,נדב,לא יחדש,עבר לאתונה,מנויי עבר ללא מנוי השנה - חוזרים הביתה,,רז שמעון,000010531,10531,,0547995767,...,False,False,9,0,0,0,1,,0,ללא
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22124,,,,,,דוד עזרא,000012239,12239,,547661175.0,...,False,,9,,,,,,,
22125,,,,,,אביה נוי,318907185‬,1390554,,,...,False,,0,,,,,,,
22126,,,,,,אליה גולדמן,226916658,1343458,,,...,False,,0,,,,,,,
22127,,,,,,יענקי סטאריק,yanki.starik@gmail.com,1419241,,,...,False,,0,,,,,,,


### Whoever have both tickets - then ST_Count = 2 not 1:

In [25]:
# Build the mask
mask = (
    (enriched_df["Old Season Ticket (All Owners)"] == "מנוי לעונת 2024/2025") &
    (enriched_df["RenewSeasonTicket"] == "חידש")
)

# Count how many rows match
matched_rows = mask.sum()
print(f"✅ Rows where both conditions are true: {matched_rows}")

# Show sample rows before updating
if matched_rows > 0:
    print("✅ Sample rows BEFORE update:")
    display(enriched_df.loc[mask, ["User Id", "Full Name", "ST_Count"]].head())

# Update ST_Count to 2
enriched_df.loc[mask, "ST_Count"] = 2

# fill na riseabove with false
enriched_df["RiseAbove2425"] = (
    enriched_df["RiseAbove2425"]
        .map({1.0: True, 0.0: False})
        .astype("boolean")
)

# Validate update
print("✅ Confirming ST_Count updated to 2 in matching rows:")
updated_counts = enriched_df.loc[mask, "ST_Count"].value_counts(dropna=False)
print(updated_counts)

✅ Rows where both conditions are true: 8
✅ Sample rows BEFORE update:


Unnamed: 0,User Id,Full Name,ST_Count
232,11080,בועז ברזלי,1
3931,33762,אלון שגיא,1
4517,37199,יהונתן רוזמרין,1
6587,1331209,אריאל ברון,1
6991,1337093,אמרי מן,1


✅ Confirming ST_Count updated to 2 in matching rows:
ST_Count
2    8
Name: count, dtype: int64


### Last check:

In [26]:
columns_to_strip = ["Assignment", "Status", "Comments", "קמפיין"]

# First fill NaN values with empty strings
for col in columns_to_strip:
    enriched_df[col] = enriched_df[col].fillna('')

# Now convert to string and strip whitespace
for col in columns_to_strip:
    enriched_df[col] = enriched_df[col].astype(str).str.strip()

In [27]:
enriched_df['RiseAbove2425'] = enriched_df['RiseAbove2425'].fillna(False)
enriched_df['RenewSeasonTicket'] = enriched_df['RenewSeasonTicket'].fillna('None')
enriched_df['Community'] = enriched_df['Community'].fillna('')
enriched_df['הסעות עונת 2425'] = enriched_df['הסעות עונת 2425'].fillna('ללא')

enriched_df['ST_Count'] = enriched_df['ST_Count'].fillna(0).astype(int)
enriched_df['Community Count'] = enriched_df['Community Count'].fillna(0).astype(int)
enriched_df['Count Free Home Ticket'] = enriched_df['Count Free Home Ticket'].fillna(0).astype(int)
enriched_df['Count Paid Home Ticket'] = enriched_df['Count Paid Home Ticket'].fillna(0).astype(int)
enriched_df['Count Free Away Ticket'] = enriched_df['Count Free Away Ticket'].fillna(0).astype(int)
enriched_df['Count Paid Away Ticket'] = enriched_df['Count Paid Away Ticket'].fillna(0).astype(int)

# enriched_df["Marketing Allowed"] = enriched_df["Marketing Allowed"].map({
#     1: True,
#     0: False
# })

enriched_df.loc[enriched_df["RenewPriceType"].notnull() & (enriched_df["RenewPriceType"] != ""), "RenewSeasonTicket"] = "חידש"

  enriched_df['Community Count'] = enriched_df['Community Count'].fillna(0).astype(int)
  enriched_df['Count Free Home Ticket'] = enriched_df['Count Free Home Ticket'].fillna(0).astype(int)
  enriched_df['Count Paid Home Ticket'] = enriched_df['Count Paid Home Ticket'].fillna(0).astype(int)
  enriched_df['Count Free Away Ticket'] = enriched_df['Count Free Away Ticket'].fillna(0).astype(int)
  enriched_df['Count Paid Away Ticket'] = enriched_df['Count Paid Away Ticket'].fillna(0).astype(int)


In [28]:
desired_order = [
    "Assignment",
    "Status",
    "Comments",
    "קמפיין",
    'הערות קמפיין',
    "Full Name",
    "TZ",
    "User Id",
    "Email",
    "Phone",
    "city",
    "Old Season Ticket (All Owners)",
    "OldTransactionDate",
    "Old Seat",
    "Base price",
    "Price",
    "Price area",
    "OldPriceType",
    "OldSeasonTicketId",
    "RenewSeasonTicket",
    "Renew Seat",
    "RenewTransactionDate",
    "RenewType",
    "RenewSeasonTicketId",
    "Marketing Allowed",
    "Renew Base Price",
    "RenewPrice",
    "Renew Price Area",
    "RenewPriceType",
    "isorganization",
    "RiseAbove2425",
    "ST_Count",
    "Count Free Home Ticket",
    "Count Paid Home Ticket",
    "Count Free Away Ticket",
    "Count Paid Away Ticket",
    "Community",
    "Community Count",
    "הסעות עונת 2425"
]
enriched_df = enriched_df[desired_order]
enriched_df

Unnamed: 0,Assignment,Status,Comments,קמפיין,הערות קמפיין,Full Name,TZ,User Id,Email,Phone,...,isorganization,RiseAbove2425,ST_Count,Count Free Home Ticket,Count Paid Home Ticket,Count Free Away Ticket,Count Paid Away Ticket,Community,Community Count,הסעות עונת 2425
0,נדב,לא יחדש,"לא יחדש, מעדיף לשים את הכסף והזמן על הקבוצת רגל.",ירושלמי - רוכשי כרטיסים ללא מנוי,,גלעד גריבסקי,205461999,10008,3gilad3@gmail.com,,...,False,False,5,0,0,0,0,,0,ללא
1,,חידש,,,,שי קציר,040175192,10010,katzirs30@gmail.com,0502877926,...,False,False,10,5,3,0,3,,0,ללא
2,עידן,חידש,,מחדשי 2024/25 רגילים,,שלמה קוטלר,000094938,10011,rachel@kotler-adika.co.il,0544755212,...,False,False,10,6,4,0,0,,0,ללא
3,עדי,חידש,מתלבט בגלל מילואים,מחדשי 2024/25 רגילים,,יונתן אנסלמן,038818340,10528,yonie84@gmail.com,0524567167,...,False,False,10,4,1,0,1,,0,ללא
4,נדב,לא יחדש,עבר לאתונה,מנויי עבר ללא מנוי השנה - חוזרים הביתה,,רז שמעון,000010531,10531,,0547995767,...,False,False,9,0,0,0,1,,0,ללא
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22124,,,,,,דוד עזרא,000012239,12239,,547661175.0,...,False,False,9,0,0,0,0,,0,ללא
22125,,,,,,אביה נוי,318907185‬,1390554,,,...,False,False,0,0,0,0,0,,0,ללא
22126,,,,,,אליה גולדמן,226916658,1343458,,,...,False,False,0,0,0,0,0,,0,ללא
22127,,,,,,יענקי סטאריק,yanki.starik@gmail.com,1419241,,,...,False,False,0,0,0,0,0,,0,ללא


# The End:

In [29]:
enriched_df.to_excel('Final_Users.xlsx', index=False)

### Attendace for ST_members:

In [None]:
# Load your data
df = pd.read_excel("processed_games.xlsx")

# Define mapping function
def map_values(x):
    if pd.isna(x):
        return None
    if x == 0.0:
        return False
    if x == 1.0:
        return True
    return x

# Apply mapping to all cells
df = df.applymap(map_values)

df

  df = df.applymap(map_values)


Unnamed: 0,Full Name,User Id,Id,Round 2: Hapoel Tel Aviv 🚗,Round 4: Hapoel Holon 🏠,Round 8: Hapoel Afula 🏠,🏠 מחזור 9: הפועל ״בנק יהב״ ירושלים -הפועל גליל עליון,"🏠 מחזור11: הפועל ״בנק יהב״ ירושלים -מכבי תא""",ליגת ווינר סל מחזור 17: גלבוע גליל 🏠,ליגת ווינר סל מחזור 18: הרצליה 🏠,ליגת ווינר סל מחזור 20: הפועל חיפה 🏠,מחזור 22: אליצור עירוני נתניה 🏠,מחזור 24: הפועל באר שבע - דימונה 🏠,מחזור 26: עירוני נס ציונה 🏠,רבע גמר 1: מכבי עירוני רמת גן 🏠,רבע גמר 3: מכבי עירוני רמת גן 🏠,חצי גמר משחק 2: הפועל תל אביב 🏠,Discount rule,גמר ליגת ווינר משחק 2: מכבי תל אביב 🏠
0,five test,1330587,801629,False,False,False,False,False,False,False,False,False,False,False,False,False,,,
1,Test twelve,1330653,801726,False,False,False,False,False,False,False,False,False,False,False,False,False,,,
2,Test ROBO,1330686,801760,False,False,False,False,False,False,False,False,False,False,False,False,False,,,
3,Testing ROBO,1330719,801793,False,False,False,False,False,False,False,False,False,False,False,False,False,,,
4,עירית עינב,26178,801992,False,False,False,False,False,False,False,False,False,False,False,False,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6904,,11567,899888,,,,,,,,,,,,,,,,True
6905,,12825,899934,,,,,,,,,,,,,,,,True
6906,,14075,899935,,,,,,,,,,,,,,,,True
6907,,1420313,899936,,,,,,,,,,,,,,,,False
