In [13]:
import numpy as np
from array import array
from typing import Sequence
import pandas as pd


def calculate_mean(time_points: pd.Series, values: pd.Series) -> float:
    prev_time_point = time_points.iloc[0]
    delay_sum = 0.0
    value_sum = 0.0

    for time_point, value in zip(time_points.iloc[1:], values):
        delay = time_point - prev_time_point
        prev_time_point = time_point
        delay_sum += delay
        value_sum += value * delay

    return value_sum / delay_sum

def calculate_std_dev(time_points: pd.Series, values: pd.Series, mean: float) -> float:
    prev_time_point = time_points.iloc[0]
    delay_sum = 0.0
    value_sum = 0.0

    for time_point, value in zip(time_points.iloc[1:], values):
        delay = time_point - prev_time_point
        prev_time_point = time_point
        delay_sum += delay
        value_sum += ((value - mean) ** 2) * delay

    return np.sqrt(value_sum / delay_sum)

In [19]:
import os
from pathlib import Path
import pandas as pd
import attr
from typing import Optional

verification_data_dir = Path('./verification_data')

@attr.frozen
class ParamsData:
    common_props: pd.DataFrame
    time_wait_allocate: pd.DataFrame
    time_in_system: pd.DataFrame

datas: dict[tuple[int, ...], ParamsData] = {}

for dirpath, dir, filenames in os.walk(verification_data_dir):
    dir_path = Path(dirpath)
    if dir_path.name == verification_data_dir.name:
        continue
    params = tuple(int(n) for n in dir_path.name.split('_'))

    common_props: Optional[pd.DataFrame] = None
    time_wait_allocate: Optional[pd.DataFrame] = None
    time_in_system: Optional[pd.DataFrame] = None
    for file_name in filenames:
        data = pd.read_csv(Path(dirpath) / file_name)
        if file_name.startswith('commonProps'):
            common_props = data
        elif file_name.startswith('timeWaitAllocate'):
            time_wait_allocate = data
        elif file_name.startswith('timeInSystem'):
            time_in_system = data
    if common_props is not None and time_wait_allocate is not None and time_in_system is not None:
        datas[params] = ParamsData(common_props, time_wait_allocate, time_in_system)
    

In [24]:
from array import array

@attr.frozen
class CommonPropsStats:
    diskLoad_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    diskLoad_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))
    ioChannelLoad_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    ioChannelLoad_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))
    processorsLoad_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    processorsLoad_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))
    totalWaitAllocate_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    totalWaitAllocate_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))
    useOfPage_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    useOfPage_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))

@attr.frozen
class TimeInSystemStats:
    timeInSystem_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeInSystem_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))

@attr.frozen
class TimeWaitAllocateStats:
    timeWaitAllocate_means: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeWaitAllocate_std_devs: array[float] = attr.field(init=False, factory=lambda: array('d'))

@attr.frozen
class ComplexStats:
    common_props: pd.DataFrame
    time_wait_allocate: pd.DataFrame
    time_in_system: pd.DataFrame

output_stats_dict: dict[tuple[int, ...], ComplexStats] = {}

for params, params_data in datas.items():
    common_propts_stats = CommonPropsStats()
    time_in_system_stats = TimeInSystemStats()
    time_wait_allocate_stats = TimeWaitAllocateStats()

    for run_num, group in params_data.common_props.groupby('runNumber'):
        # Calculate means and standard deviations
        diskLoad_mean = calculate_mean(group['timePoint'], group['diskLoad'])
        diskLoad_std_dev = calculate_std_dev(group['timePoint'], group['diskLoad'], diskLoad_mean)

        ioChannelLoad_mean = calculate_mean(group['timePoint'], group['ioChannelLoad'])
        ioChannelLoad_std_dev = calculate_std_dev(group['timePoint'], group['ioChannelLoad'], ioChannelLoad_mean)

        processorsLoad_mean = calculate_mean(group['timePoint'], group['processorsLoad'])
        processorsLoad_std_dev = calculate_std_dev(group['timePoint'], group['processorsLoad'], processorsLoad_mean)

        totalWaitAllocate_mean = calculate_mean(group['timePoint'], group['totalWaitAllocate'])
        totalWaitAllocate_std_dev = calculate_std_dev(group['timePoint'], group['totalWaitAllocate'], totalWaitAllocate_mean)

        useOfPage_mean = calculate_mean(group['timePoint'], group['useOfPage'])
        useOfPage_std_dev = calculate_std_dev(group['timePoint'], group['useOfPage'], useOfPage_mean)

        common_propts_stats.diskLoad_means.append(diskLoad_mean)
        common_propts_stats.diskLoad_std_devs.append(diskLoad_std_dev)

        common_propts_stats.ioChannelLoad_means.append(ioChannelLoad_mean)
        common_propts_stats.ioChannelLoad_std_devs.append(ioChannelLoad_std_dev)

        common_propts_stats.processorsLoad_means.append(processorsLoad_mean)
        common_propts_stats.processorsLoad_std_devs.append(processorsLoad_std_dev)

        common_propts_stats.totalWaitAllocate_means.append(totalWaitAllocate_mean)
        common_propts_stats.totalWaitAllocate_std_devs.append(totalWaitAllocate_std_dev)

        common_propts_stats.useOfPage_means.append(useOfPage_mean)
        common_propts_stats.useOfPage_std_devs.append(useOfPage_std_dev)

    for run_num, group in params_data.time_in_system.groupby('runNumber'):
        timeInSystem_mean = calculate_mean(group['timePoint'], group['timeInSystem'])
        timeInSystem_std_dev = calculate_std_dev(group['timePoint'], group['timeInSystem'], diskLoad_mean)
        time_in_system_stats.timeInSystem_means.append(timeInSystem_mean)
        time_in_system_stats.timeInSystem_std_devs.append(timeInSystem_std_dev)

    for run_num, group in params_data.time_wait_allocate.groupby('runNumber'):
        timeWaitAllocate_mean = calculate_mean(group['timePoint'], group['timeWaitAllocate'])
        timeWaitAllocate_std_dev = calculate_std_dev(group['timePoint'], group['timeWaitAllocate'], diskLoad_mean)
        time_wait_allocate_stats.timeWaitAllocate_means.append(timeWaitAllocate_mean)
        time_wait_allocate_stats.timeWaitAllocate_std_devs.append(timeWaitAllocate_std_dev)

    output_stats_dict[params] = ComplexStats(
        pd.DataFrame(attr.asdict(common_propts_stats)),
        pd.DataFrame(attr.asdict(time_in_system_stats)),
        pd.DataFrame(attr.asdict(time_wait_allocate_stats))
    ) 

In [27]:
for params, complex_output in output_stats_dict.items():
    diskLoad_global_mean = complex_output.common_props.diskLoad_means.mean()
    print(complex_output.common_props.diskLoad_means - diskLoad_global_mean)

    break

0.0026888810644318415
0   -0.000002
1    0.000037
2    0.000002
3   -0.000015
4   -0.000023
Name: diskLoad_means, dtype: float64
