In [None]:
import pandas as pd
from pathlib import Path

def split_by_extref_block(
    input_csv,
    extref_col='InstallmentPaymentExtRef',
    target_value=79991,
    output_xlsx=None,
    preview_rows=20,          # how many rows to show in preview
    save=True                 # set False if you only want to preview
):
    df = pd.read_csv(input_csv)

    # --- find the exact column name (ignore spaces/case) ---
    norm = {c: ''.join(str(c).split()).lower() for c in df.columns}
    wanted = ''.join(extref_col.split()).lower()
    try:
        col = next(orig for orig, n in norm.items() if n == wanted)
    except StopIteration:
        raise KeyError(f"Column '{extref_col}' not found. Available: {list(df.columns)}")

    # --- treat the ExtRef column as numeric to avoid '79991' vs '79991.0' mismatches ---
    ext = pd.to_numeric(df[col], errors='coerce')

    # --- make block ids: every non-null in ExtRef starts a new block ---
    block_id = df[col].notna().cumsum()

    # --- which block ids start with the target header? (can be multiple) ---
    target_blocks = block_id[ext == float(target_value)].unique()

    # --- mask for rows that belong to any target block ---
    mask = block_id.isin(target_blocks)

    df_only   = df[mask].copy()
    df_except = df[~mask].copy()

    # --- optionally save to Excel with two sheets ---
    if save:
        if output_xlsx is None:
            output_xlsx = str(Path(input_csv).with_suffix('')) + '_split.xlsx'
        with pd.ExcelWriter(output_xlsx, engine='openpyxl') as w:
            df_except.to_excel(w, sheet_name='except_79991', index=False)
            df_only.to_excel(w,   sheet_name='only_79991',   index=False)
        print(f"\nSaved: {output_xlsx}")

    return df_only, df_except

# df_only, df_except = split_by_extref_block("HapoelInstallmentsReportCsv_2025-08-14.csv", save=False)
# When happy:
# split_by_extref_block("HapoelInstallmentsReportCsv_2025-08-14.csv", save=True)


2025-08-14 16:42:30.976 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-08-14 16:42:30.978 Session state does not function when running a script without `streamlit run`


DeltaGenerator()