In [15]:
import numpy as np
import pandas as pd


def calculate_mean(time_points: pd.Series, values: pd.Series) -> float:
    prev_time_point = time_points.iloc[0]
    delay_sum = 0.0
    value_sum = 0.0

    for time_point, value in zip(time_points.iloc[1:], values):
        delay = time_point - prev_time_point
        prev_time_point = time_point
        delay_sum += delay
        value_sum += value * delay

    try:
        res = value_sum / delay_sum
        return res
    except ZeroDivisionError:
        return 0

def calculate_std_dev(time_points: pd.Series, values: pd.Series, mean: float) -> float:
    prev_time_point = time_points.iloc[0]
    delay_sum = 0.0
    value_sum = 0.0

    for time_point, value in zip(time_points.iloc[1:], values):
        delay = time_point - prev_time_point
        prev_time_point = time_point
        delay_sum += delay
        value_sum += ((value - mean) ** 2) * delay

    try:
        res = np.sqrt(value_sum / delay_sum)
        return res
    except ZeroDivisionError:
        return 0

In [16]:
import os
from pathlib import Path
import pandas as pd
import attr
from typing import Optional

verification_data_dir = Path('./verification_data')

@attr.frozen
class ParamsData:
    common_props: pd.DataFrame
    time_wait_allocate: pd.DataFrame
    time_in_system: pd.DataFrame

datas: list[tuple[tuple[int, ...], ParamsData]] = []

for dirpath, dir, filenames in os.walk(verification_data_dir):
    dir_path = Path(dirpath)
    if dir_path.name == verification_data_dir.name:
        continue
    params = tuple(int(n) for n in dir_path.name.split('_'))

    common_props: Optional[pd.DataFrame] = None
    time_wait_allocate: Optional[pd.DataFrame] = None
    time_in_system: Optional[pd.DataFrame] = None
    for file_name in filenames:
        data = pd.read_csv(Path(dirpath) / file_name)
        if file_name.startswith('commonProps'):
            common_props = data
        elif file_name.startswith('timeWaitAllocate'):
            time_wait_allocate = data
        elif file_name.startswith('timeInSystem'):
            time_in_system = data
    
    if common_props is not None and time_wait_allocate is not None and time_in_system is not None:
        datas.append((params, ParamsData(common_props, time_wait_allocate, time_in_system)))
    else:
        raise Exception('empty data')
    

In [17]:
from array import array

@attr.frozen
class MeanStddevStats:
    diskLoad_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    diskLoad_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    ioChannelLoad_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    ioChannelLoad_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    processorsLoad_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    processorsLoad_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    totalWaitAllocate_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    totalWaitAllocate_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    useOfPage_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    useOfPage_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeInSystem_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeInSystem_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeWaitAllocate_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeWaitAllocate_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))

mean_stddev_stats_list: list[tuple[tuple[int, ...], pd.DataFrame]] = []

for el in datas:
    params, params_data = el
    mean_stddev_stats = MeanStddevStats()

    for run_num, group in params_data.common_props.groupby('runNumber'):
        # Calculate means and standard deviations
        diskLoad_mean = calculate_mean(group['timePoint'], group['diskLoad'])
        diskLoad_std_dev = calculate_std_dev(group['timePoint'], group['diskLoad'], diskLoad_mean)

        ioChannelLoad_mean = calculate_mean(group['timePoint'], group['ioChannelLoad'])
        ioChannelLoad_std_dev = calculate_std_dev(group['timePoint'], group['ioChannelLoad'], ioChannelLoad_mean)

        processorsLoad_mean = calculate_mean(group['timePoint'], group['processorsLoad'])
        processorsLoad_std_dev = calculate_std_dev(group['timePoint'], group['processorsLoad'], processorsLoad_mean)

        totalWaitAllocate_mean = calculate_mean(group['timePoint'], group['totalWaitAllocate'])
        totalWaitAllocate_std_dev = calculate_std_dev(group['timePoint'], group['totalWaitAllocate'], totalWaitAllocate_mean)

        useOfPage_mean = calculate_mean(group['timePoint'], group['useOfPage'])
        useOfPage_std_dev = calculate_std_dev(group['timePoint'], group['useOfPage'], useOfPage_mean)

        mean_stddev_stats.diskLoad_mean.append(diskLoad_mean)
        mean_stddev_stats.diskLoad_std_dev.append(diskLoad_std_dev)

        mean_stddev_stats.ioChannelLoad_mean.append(ioChannelLoad_mean)
        mean_stddev_stats.ioChannelLoad_std_dev.append(ioChannelLoad_std_dev)

        mean_stddev_stats.processorsLoad_mean.append(processorsLoad_mean)
        mean_stddev_stats.processorsLoad_std_dev.append(processorsLoad_std_dev)

        mean_stddev_stats.totalWaitAllocate_mean.append(totalWaitAllocate_mean)
        mean_stddev_stats.totalWaitAllocate_std_dev.append(totalWaitAllocate_std_dev)

        mean_stddev_stats.useOfPage_mean.append(useOfPage_mean)
        mean_stddev_stats.useOfPage_std_dev.append(useOfPage_std_dev)
    
    for run_num, group in params_data.time_in_system.groupby('runNumber'):
        timeInSystem_mean = calculate_mean(group['timePoint'], group['timeInSystem'])
        timeInSystem_std_dev = calculate_std_dev(group['timePoint'], group['timeInSystem'], timeInSystem_mean)
        mean_stddev_stats.timeInSystem_mean.append(timeInSystem_mean)
        mean_stddev_stats.timeInSystem_std_dev.append(timeInSystem_std_dev)

    for run_num, group in params_data.time_wait_allocate.groupby('runNumber'):
        timeWaitAllocate_mean = calculate_mean(group['timePoint'], group['timeWaitAllocate'])
        timeWaitAllocate_std_dev = calculate_std_dev(group['timePoint'], group['timeWaitAllocate'], timeWaitAllocate_mean)
        mean_stddev_stats.timeWaitAllocate_mean.append(timeWaitAllocate_mean)
        mean_stddev_stats.timeWaitAllocate_std_dev.append(timeWaitAllocate_std_dev)

    mean_stddev_stats_list.append((params, pd.DataFrame(attr.asdict(mean_stddev_stats))))

In [18]:
params_mat: list[pd.Series] = []
global_mean_stddev_list: list[pd.DataFrame] = []
mean_stddev_stats_relative_mean_list: list[pd.DataFrame] = []

for i, params_mean_stddev_stats in enumerate(mean_stddev_stats_list):
    params = params_mean_stddev_stats[0]
    mean_stddev_stats = params_mean_stddev_stats[1]

    params_mat.append(pd.Series({
        'Кількість сторінок': params[0],
        'Кількість процесорів': params[1],
        'Кількість дисків':params[2],
        'Початок сторінок':params[3],
        'Кінець сторінок': params[4],
        'Середій інтервал надходження завдань': params[5]
    }))
    means = mean_stddev_stats.mean()
    global_mean_stddev_list.append(means)
    mean_stddev_stats_relative_mean = ((mean_stddev_stats - means).abs() * 100) / means
    mean_stddev_stats_relative_mean.fillna(0, inplace=True)
    mean_stddev_stats_relative_mean['params_index'] = i
    mean_stddev_stats_relative_mean_list.append(mean_stddev_stats_relative_mean)

In [19]:
params_data_frame = pd.concat(params_mat, axis=1)
params_data_frame = params_data_frame.T
params_data_frame

Unnamed: 0,Кількість сторінок,Кількість процесорів,Кількість дисків,Початок сторінок,Кінець сторінок,Середій інтервал надходження завдань
0,200,4,5,30,40,8
1,400,5,11,70,100,8
2,700,12,12,30,70,8
3,1000,30,30,70,80,15
4,1000,40,8,60,100,9
5,131,2,4,20,60,7
6,131,2,4,20,60,5


In [20]:
params_data_frame.to_csv('params.csv', index=True, index_label='Індекс')

In [21]:
global_mean_stddev_data_frame = pd.concat(global_mean_stddev_list, axis=1)
global_mean_stddev_data_frame = global_mean_stddev_data_frame.T

In [22]:
global_mean_data_frame = pd.DataFrame()
global_std_dev_data_frame = pd.DataFrame()
for name in global_mean_stddev_data_frame.columns:
    short_name = name.split('_')[0]
    if name.endswith('mean'):
        global_mean_data_frame[short_name] = global_mean_stddev_data_frame[name]
    else:
        global_std_dev_data_frame[short_name] = global_mean_stddev_data_frame[name]
rename_dict = {
    'diskLoad': 'Завантаження дисків',
    'ioChannelLoad': 'Завантаження каналу введення-виведення',
    'processorsLoad': 'Завантаження процесорів',
    'totalWaitAllocate': "Кількість завдань в очікуванні пам'яті",
    'useOfPage': 'Кількість зайнятих сторінок',
    'timeInSystem': 'Час завдання в системі',
    'timeWaitAllocate': "Час виділення пам'яті",
}
global_mean_data_frame.rename(columns=rename_dict, inplace=True)
global_std_dev_data_frame.rename(columns=rename_dict, inplace=True)

In [23]:
global_mean_data_frame

Unnamed: 0,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,0.00468,0.001872,1.249056,0.0,43.184582,9.471798,0.0
1,0.004692,0.001877,1.252504,5e-06,105.808855,9.507947,1.86588
2,0.004685,0.001874,1.25103,0.0,61.9339,9.503207,0.0
3,0.002497,0.001,0.667321,0.0,49.730564,9.502587,0.0
4,0.004162,0.001664,1.109895,0.0,88.34111,9.470952,0.0
5,0.005357,0.002142,1.429417,0.031638,58.964293,10.172988,2.255853
6,0.006252,0.002498,1.66645,13460.980315,112.973999,2494.007541,3474.695934


In [24]:
global_std_dev_data_frame

Unnamed: 0,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,3.2e-05,1.3e-05,0.006727,0.0,23.015777,2.890892,0.0
1,3.4e-05,1.3e-05,0.006743,0.00216,56.637641,2.903898,0.32331
2,3.1e-05,1.1e-05,0.005596,0.0,35.095668,2.905723,0.0
3,2e-05,8e-06,0.004333,0.0,38.850164,2.938753,0.0
4,2.7e-05,1.1e-05,0.005691,0.0,50.800702,2.913949,0.0
5,3.8e-05,1.5e-05,0.007255,0.186064,30.31943,3.416209,2.341983
6,5.1e-05,1.9e-05,0.009299,7776.827862,12.429553,28798.537085,34034.839777


In [25]:
mean_stddev_stats_relative_mean_data_frame = pd.concat(mean_stddev_stats_relative_mean_list, ignore_index=True)
mean_stats_relative_mean_data_frame = pd.DataFrame()
stddev_stats_relative_mean_data_frame = pd.DataFrame()
for name in global_mean_stddev_data_frame.columns:
    short_name = name.split('_')[0]
    if name.endswith('mean'):
        mean_stats_relative_mean_data_frame[short_name] = mean_stddev_stats_relative_mean_data_frame[name]
    else:
        stddev_stats_relative_mean_data_frame[short_name] = mean_stddev_stats_relative_mean_data_frame[name]

mean_stats_relative_mean_data_frame['Індекс набору параметрів'] = mean_stddev_stats_relative_mean_data_frame['params_index']
stddev_stats_relative_mean_data_frame['Індекс набору параметрів'] = mean_stddev_stats_relative_mean_data_frame['params_index']
mean_stats_relative_mean_data_frame.rename(columns=rename_dict, inplace=True)
stddev_stats_relative_mean_data_frame.rename(columns=rename_dict, inplace=True)

In [26]:
mean_stats_relative_mean_data_frame

Unnamed: 0,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті,Індекс набору параметрів
0,0.052621,0.299458,0.076144,0.0,0.007055,0.090441,0.0,0
1,0.22069,0.016886,0.044134,0.0,0.073928,0.030802,0.0,0
2,0.093726,0.201916,0.12231,0.0,0.111908,0.018753,0.0,0
3,0.138953,0.240141,0.048018,0.0,0.199867,0.013829,0.0,0
4,0.040632,0.159486,0.042281,0.0,0.021086,0.054715,0.0,0
5,0.079,0.252505,0.408152,56.006882,0.269543,0.087676,44.359202,1
6,0.174527,0.127141,0.020182,30.791464,0.183802,0.004662,75.375511,1
7,0.289745,0.439853,0.087048,49.177707,0.034611,0.052508,21.115569,1
8,0.299312,0.053746,0.22407,70.587905,0.161363,0.183052,215.399011,1
9,0.08596,0.006462,0.076851,32.967266,0.041012,0.038206,74.548729,1


In [27]:
stddev_stats_relative_mean_data_frame

Unnamed: 0,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті,Індекс набору параметрів
0,18.327719,17.935564,26.361181,0.0,0.208623,0.103056,0.0,0
1,5.192451,12.501233,3.726713,0.0,0.124723,0.13022,0.0,0
2,31.66328,16.525306,34.97041,0.0,0.348635,0.086716,0.0,0
3,7.367269,6.117575,22.086721,0.0,0.03119,0.010196,0.0,0
4,15.510379,17.20855,34.422663,0.0,0.046479,0.103684,0.0,0
5,4.023246,17.814928,10.103743,31.499768,0.083844,0.060355,100.0,1
6,2.425861,3.007845,1.756416,14.082975,0.07227,0.265637,135.289917,1
7,22.327476,27.277969,21.157963,26.139323,0.074905,0.237314,113.297735,1
8,11.022406,4.172473,34.266779,34.887721,0.009102,0.142448,100.0,1
9,4.855962,8.298413,1.248658,15.444301,0.095581,0.17448,48.587651,1
