In [44]:
import pandas as pd
import numpy as np

# --- 1) Load
df = pd.read_csv("HapoelInstallmentsReportCsv_2025-08-14.csv")

# --- 2) Split 79991 vs rest using the forward-filled payment ref
block_key = df['InstallmentPaymentExtRef'].replace({"": pd.NA, "nan": pd.NA, "NaN": pd.NA}).ffill()
block_key = pd.to_numeric(block_key, errors="coerce").astype("Int64").astype(str)

only_79991 = df[block_key == "79991"].copy()
rest        = df[block_key != "79991"].copy()

# --- 3) Within 79991, split segments that contain 4118 in InstallmentProductExtRef
code = pd.to_numeric(only_79991["InstallmentProductExtRef"], errors="coerce")
only_79991["InstallmentProductExtRef"] = code

# Segment id: constant between blanks (NaN) and resets at each blank
seg_id = code.isna().cumsum()

# Flag segments that contain 4118 anywhere
seg_has_4118 = (code == 4118).groupby(seg_id).transform("any")

advertisment          = only_79991[seg_has_4118].copy()
without_advertisement = only_79991[~seg_has_4118].copy()

print("Rows in 79991 advertisment:", advertisment.shape)
print("Rows in 79991 without_ad:", without_advertisement.shape)
print("Rows in the rest:", rest.shape)

# --- 4) Save all three to one Excel
out = "HapoelInstallments_split.xlsx"
with pd.ExcelWriter(out, engine="openpyxl") as w:
    without_advertisement.to_excel(w, sheet_name="79991_without_ad", index=False)
    advertisment.to_excel(w,        sheet_name="79991_advertisement", index=False)
    rest.to_excel(w,                sheet_name="Rest", index=False)

Rows in 79991 advertisment: (320, 10)
Rows in 79991 without_ad: (98, 10)
Rows in the rest: (1449, 10)
