In [22]:
import pandas as pd
import time
import shutil
from datetime import datetime
from pathlib import Path
from src.io import read_rpt

In [23]:
run_numbers = ["179", "250"]
runs_of_interest = ["301", "302", "303", "304", "305", "306", "307", "308", "311", "312", "313", "314", "315"]
vars_to_keep = [
    "IFRS17_CONTRACT_ID",
    "LFRC_BEL",
    "LFRC_RA",
    "NO_POLS_MS(1:1)",
    "NO_POLS_MS(1:2)",
    "NO_POLS_MS(1:3)",
    "LFRC_BEL_COMPONENTS_I17(1)",
    "LFRC_BEL_COMPONENTS_I17(13)",
    "LFRC_BEL_COMPONENTS_I17(36)",
    "LFRC_BEL_COMPONENTS_I17(7)",
    "LFRC_BEL_COMPONENTS(15)",
    "LFRC_BEL_COMPONENTS(16)",
    "LFRC_BEL_COMPONENTS(17)",
    "LFRC_BEL_COMPONENTS(18)",
    "LFRC_BEL_COMPONENTS(19)",
    "LFRC_BEL_COMPONENTS(20)",
    "LFRC_BEL_COMPONENTS(21)",
    "LFRC_BEL_COMPONENTS(22)",
    "LFRC_BEL_COMPONENTS(23)",
    "LFRC_BEL_COMPONENTS(24)",
    "LFRC_BEL_COMPONENTS(25)",
    "LFRC_BEL_COMPONENTS(26)",
    "LFRC_BEL_COMPONENTS(27)",
    "LFRC_BEL_COMPONENTS(28)",
    "LFRC_BEL_COMPONENTS(29)",
    "LFRC_BEL_COMPONENTS(30)",
    "LFRC_BEL_COMPONENTS(51)",
    "REPORTING_DATA_DIMENSION(4)",
    "IFRS17_CONTRACT_ID",
    "LFRC_BEL",
    "LFRC_RA",
    "NO_POLS_MS(1:1)",
    "NO_POLS_MS(1:2)",
    "NO_POLS_MS(1:3)",
    "LFRC_BEL_COMPONENTS_I17(13)",
    "LFRC_BEL_COMPONENTS_I17(13)",
    "LFRC_BEL_COMPONENTS_I17(36)",
    "LFRC_BEL_COMPONENTS_I17(7)",
    "BEL_COMPONENTS(12)",
    "BEL_COMPONENTS(13)",
    "BEL_COMPONENTS(14)",
    "BEL_COMPONENTS(15)",
    "BEL_COMPONENTS(16)",
    "BEL_COMPONENTS(17)",
    "BEL_COMPONENTS(18)",
    "BEL_COMPONENTS(21)",
    "BEL_COMPONENTS(35)",
    "BEL_COMPONENTS(44)",
    "BEL_COMPONENTS(58)"
]

In [24]:
# create an output dir
out_dir = Path("./out")
out_dir.mkdir(parents=True, exist_ok=True)

In [25]:
# create a tmp dir to copy the file to
local_temp_dir = Path("C:/Temp/MPF_Files")
local_temp_dir.mkdir(parents=True, exist_ok=True)

In [26]:
def process_rpt_file(args):
    rpt_file, run, run_number, out_dir, local_temp_dir, vars_to_keep = args
    local_copy = local_temp_dir / rpt_file.name
    try:
        out_path = out_dir / f"#288.{run}" / f"RUN_{run_number}"
        out_file = out_path / f"{rpt_file.stem}.csv"

        if out_file.exists():
            print(f"{rpt_file} already exists. skipping...\n")
            return

        out_path.mkdir(parents=True, exist_ok=True)

        shutil.copy2(rpt_file, local_copy)

        start = time.perf_counter()
        df = read_rpt(local_copy)
        end = time.perf_counter()

        output_columns = [col for col in vars_to_keep if col in df.columns]
        df[output_columns].to_csv(out_file, index=False)

        print(f"{datetime.now()} processed {rpt_file.name} in {end - start:.2f}s")

    except Exception as e:
        print(f"Error with {rpt_file}: {e}")
    finally:
        if local_copy.exists():
            local_copy.unlink()

In [27]:
all_tasks = []
local_temp_dir = Path("C:/Temp/VPN_Files")
local_temp_dir.mkdir(parents=True, exist_ok=True)

for run in runs_of_interest:
    results_path = Path(rf"\\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.{run}")
    for run_number in run_numbers:
        results_dir = results_path / f"RUN_{run_number}"
        rpts = list(results_dir.glob("*.rpt"))

        for rpt_file in rpts:
            all_tasks.append((rpt_file, run, run_number, out_dir, local_temp_dir, vars_to_keep))

In [28]:
if __name__ == "__main__":
    from concurrent.futures import ProcessPoolExecutor
    with ProcessPoolExecutor(max_workers=4) as executor:
        executor.map(process_rpt_file, all_tasks)

In [None]:
for run in runs_of_interest:
    results_path = rf"\\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.{run}"
    for run_number in run_numbers:
        results_dir = Path(results_path) / f"RUN_{run_number}"
        # get files in the results_dir
        rpts = list(results_dir.glob("*.rpt"))
        for rpt_file in rpts:
            try:
                print(f"{datetime.now()} processing {rpt_file}")
                
                # check if output dir exists
                out_path = out_dir / f"#288.{run}" /f"RUN_{run_number}"
                if not out_path.exists():
                    out_path.mkdir(parents=True)

                # if the file already exists, skip it
                out_file = out_path / f"{rpt_file.stem}.csv"

                if out_file.exists():
                    print("file exists. skipping..\n")
                    continue

                # create a local copy
                local_copy = local_temp_dir / rpt_file.name
                if not local_copy.exists():
                    shutil.copy2(rpt_file, local_copy)  # preserves metadata, faster than opening
                
                # read in data
                start = time.perf_counter()
                df = read_rpt(local_copy)
                print(f"{datetime.now()} the shape of the data is {df.shape}")
                end = time.perf_counter()

                # write out file
                print(f"writing file to {out_path}")
                output_columns = [col for col in vars_to_keep if col in df.columns]
                df[output_columns].to_csv(out_file, index=False)
                print(f"{datetime.now()} file written")

                # delete the local file to save space
                shutil.
                
                # print timing
                time_taken = end - start
                print(f"{time_taken=:.2f}s\n")
            except:
                continue
            finally:
                # Clean up local temp file
                if local_copy.exists():
                    local_copy.unlink()

2025-06-13 09:16:21.900058 processing \\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.26\RUN_179\MRSARIBI0N.rpt
file exists. skipping..

2025-06-13 09:16:21.900058 processing \\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.26\RUN_179\MRSARIBL0N.rpt
file exists. skipping..

2025-06-13 09:16:21.901034 processing \\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.26\RUN_179\MRSARIBX0N.rpt
file exists. skipping..

2025-06-13 09:16:21.901562 processing \\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.26\RUN_179\MRSARICX0N.rpt
file exists. skipping..

2025-06-13 09:16:21.901562 processing \\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.26\RUN_179\MRSARIDWFA.rpt
file exists. skipping..

2025-06-13 09:16:21.901562 processing \\omrprtp05\DEVELOPMENT\PE_Results\Segments\MFC RSA\2024-12\Mass Risk\Full\#288.26\RUN_179\MRSARIE00N.rpt
file exists