# Evaluate time measurements

In [20]:
import os
from pathlib import Path
from typing import Iterator

import pandas as pd

from lp_nn_robustness_verification.data_acquisition.generate_nn_params import (
    construct_out_features_counts,
)

## Preparations

In [21]:
ABS_PATH_TO_TIMINGS = Path("./time_measurements").resolve()


def get_absolute_path_to(filename: Path) -> Path:
    """Return absolute path to a file in the timings folder"""
    return ABS_PATH_TO_TIMINGS.joinpath(filename)


def timings_listdir() -> Iterator[str]:
    """Return an iterator for all filenames in the timings folder"""
    return os.listdir(ABS_PATH_TO_TIMINGS)


def read_dataframe(filepath: Path) -> pd.DataFrame:
    def find_last_full_evaluation_idx(_filepath: Path) -> tuple[int, int | None]:
        """Find last full evaluation in timings file and return line index of beginning"""
        start_idx = None
        header_length = 6
        with open(_filepath, "r") as timings_file:
            n_lines = None
            for line_idx, line_text in enumerate(timings_file):
                if "Everything has been done" in line_text:
                    start_idx = line_idx + header_length
                    n_lines = None
                if line_text[:4] == "name":
                    last_name_idx = line_idx
                if (
                    "===========================" in line_text
                    and start_idx is not None
                    and line_idx - start_idx > 1
                ):
                    if n_lines is None:
                        n_lines = line_idx - start_idx - 2
        if start_idx is None:
            return last_name_idx, None
        return start_idx, n_lines

    start_line_idx, length = find_last_full_evaluation_idx(filepath)
    dataframe = pd.read_fwf(
        filepath,
        nrows=length,
        skip_blank_lines=False,
        header=start_line_idx,
        widths=[170, 8],
    )
    if not "ttot" in dataframe.columns:
        dataframe = pd.read_fwf(
            filepath,
            nrows=length,
            skip_blank_lines=False,
            header=start_line_idx,
            widths=[180, 8],
        )
    return dataframe


def compute_all_spends(dataframe: pd.DataFrame) -> tuple[int, int, int, int, int]:
    """Cumulate all times for the four portions of the implementation"""

    def get_spend_in_idx(df: pd.DataFrame, criteria: str) -> pd.DataFrame:
        """Return a boolean Dataframe with True for all rows containing a substring"""
        return df["name"].str.contains(criteria)

    def get_spend_in(df: pd.DataFrame, idxs: pd.DataFrame) -> int:
        """Return the sum of cpu totals for all specified rows"""
        return df.loc[idxs, "ttot"].sum()

    total_spend = dataframe["ttot"].sum()
    in_interval = get_spend_in_idx(dataframe, "/interval/")
    spend_in_interval = get_spend_in(dataframe, in_interval)
    in_linear_inclusion = get_spend_in_idx(dataframe, "LinearInclusion")
    spend_in_linear_inclusion = get_spend_in(dataframe, in_linear_inclusion)
    in_robust_verifier = get_spend_in_idx(dataframe, "/linear_program.py")
    spend_in_robust_verifier = get_spend_in(dataframe, in_robust_verifier)
    in_others = ~in_interval & ~in_linear_inclusion & ~in_robust_verifier
    spend_in_others = get_spend_in(dataframe, in_others)
    return (
        spend_in_interval,
        spend_in_linear_inclusion,
        spend_in_robust_verifier,
        spend_in_others,
        total_spend,
    )


def print_for_latex_table(
    spend_in_int: int,
    spend_in_li: int,
    spend_in_robust: int,
    spend_else: int,
    spend_in_total: int,
) -> None:
    """Print LaTeX table excerpt to copy-paste"""
    print(
        """\multicolumn{{1}}{{r|}}{{{in_interval:.1e}}} & \multicolumn{{1}}{{r|}}{{{in_li:.1e}}} &
\multicolumn{{1}}{{r|}}{{{in_robust:.1e}}}
& \multicolumn{{1}}{{r|}}{{{in_others:.1e}}}
& {total:.1e}""".format(
            in_interval=spend_in_int,
            in_li=spend_in_li,
            in_robust=spend_in_robust,
            in_others=spend_else,
            total=spend_in_total,
        )
    )


def run_pipeline_for_timings_file(filepath: Path) -> None:
    """For one filepath extract and print the cumulated timings"""
    abs_path_to_file = get_absolute_path_to(filepath)
    dataframe = read_dataframe(abs_path_to_file)
    if "ttot" in dataframe.columns:
        print_for_latex_table(*compute_all_spends(dataframe))


def print_last_timings_for_all():
    """Iterate over all timings files and execute the pipeline"""
    for file_name in timings_listdir():
        if "_timings.txt" in file_name:
            print(
                f"\n==========================================================="
                f"===========================\n"
                f"Timings for {file_name}"
                f"\n==========================================================="
                f"==========================="
            )
            run_pipeline_for_timings_file(file_name)

## Extract cumulated times readily formatted to insert in LaTeX table

In [22]:
print_last_timings_for_all()


Timings for 11000_inputs_and_3_layers_with_sample_0_and_seed_0_timings.txt
\multicolumn{1}{r|}{1.3e+05} & \multicolumn{1}{r|}{4.0e+04} &
\multicolumn{1}{r|}{7.2e+03}
& \multicolumn{1}{r|}{2.5e+00}
& 1.8e+05

Timings for 22000_inputs_and_1_layers_with_sample_0_and_seed_0_timings.txt
\multicolumn{1}{r|}{2.7e+03} & \multicolumn{1}{r|}{8.1e+02} &
\multicolumn{1}{r|}{1.3e+02}
& \multicolumn{1}{r|}{6.4e-02}
& 3.7e+03

Timings for 11000_inputs_and_5_layers_with_sample_0_and_seed_0_timings.txt
\multicolumn{1}{r|}{2.4e+05} & \multicolumn{1}{r|}{7.2e+04} &
\multicolumn{1}{r|}{1.1e+04}
& \multicolumn{1}{r|}{4.2e+00}
& 3.3e+05

Timings for 110_inputs_and_5_layers_with_sample_0_and_seed_0_timings.txt
\multicolumn{1}{r|}{7.6e+01} & \multicolumn{1}{r|}{2.3e+01} &
\multicolumn{1}{r|}{2.4e+00}
& \multicolumn{1}{r|}{2.6e-02}
& 1.0e+02

Timings for 11000_inputs_and_1_layers_with_sample_0_and_seed_0_timings.txt
\multicolumn{1}{r|}{1.3e+03} & \multicolumn{1}{r|}{4.0e+02} &
\multicolumn{1}{r|}{5.9e+01}
& \

## Network architecture overview

In [23]:
size_scalers: list[int] = [1, 10, 100, 1000, 2000]
depths: list[int] = [1, 3, 5, 8]
for size_scaler in size_scalers:
    for depth in depths:
        input_size = size_scaler * 11
        if input_size - depth >= 100:
            out_features = 100
        elif input_size - depth < 10:
            out_features = input_size - depth
        else:
            out_features = 10
        n_neurons = sum(
            construct_out_features_counts(
                in_features=input_size, depth=depth, out_features=out_features
            )
        )
        print(
            f"total number of neurons for input "
            f"size {input_size} and depth "
            f"{depth}: "
            f"{n_neurons}"
        )

total number of neurons for input size 11 and depth 1: 10
total number of neurons for input size 11 and depth 3: 27
total number of neurons for input size 11 and depth 5: 40
total number of neurons for input size 11 and depth 8: 52
total number of neurons for input size 110 and depth 1: 100
total number of neurons for input size 110 and depth 3: 311
total number of neurons for input size 110 and depth 5: 520
total number of neurons for input size 110 and depth 8: 841
total number of neurons for input size 1100 and depth 1: 100
total number of neurons for input size 1100 and depth 3: 1301
total number of neurons for input size 1100 and depth 5: 2500
total number of neurons for input size 1100 and depth 8: 4300
total number of neurons for input size 11000 and depth 1: 100
total number of neurons for input size 11000 and depth 3: 11201
total number of neurons for input size 11000 and depth 5: 22300
total number of neurons for input size 11000 and depth 8: 38958
total number of neurons for