In [21]:
import numpy as np
import pandas as pd

def calculate_mean(time_points: pd.Series, values: pd.Series) -> float:
    prev_time_point = time_points.iloc[0]
    delay_sum = 0.0
    value_sum = 0.0

    for time_point, value in zip(time_points.iloc[1:], values):
        delay = time_point - prev_time_point
        prev_time_point = time_point
        delay_sum += delay
        value_sum += value * delay

    try:
        res = value_sum / delay_sum
        return res
    except ZeroDivisionError:
        return 0

def calculate_std_dev(time_points: pd.Series, values: pd.Series, mean: float) -> float:
    prev_time_point = time_points.iloc[0]
    delay_sum = 0.0
    value_sum = 0.0

    for time_point, value in zip(time_points.iloc[1:], values):
        delay = time_point - prev_time_point
        prev_time_point = time_point
        delay_sum += delay
        value_sum += ((value - mean) ** 2) * delay

    try:
        res = np.sqrt(value_sum / delay_sum)
        return res
    except ZeroDivisionError:
        return 0

In [22]:
import os
from pathlib import Path
import pandas as pd
import attr
from typing import Optional

verification_data_dir = Path('./verification_data')

@attr.frozen
class ParamsData:
    common_props: pd.DataFrame
    time_wait_allocate: pd.DataFrame
    time_in_system: pd.DataFrame

datas: list[ParamsData] = []
params_mat: list[pd.Series] = []

for dirpath, dir, filenames in os.walk(verification_data_dir):
    dir_path = Path(dirpath)
    if dir_path.name == verification_data_dir.name:
        continue
    params = tuple(int(n) for n in dir_path.name.split('_'))

    params_mat.append(pd.Series({
        'Кількість сторінок': params[0],
        'Кількість процесорів': params[1],
        'Кількість дисків':params[2],
        'Початок сторінок':params[3],
        'Кінець сторінок': params[4],
        'Середій інтервал надходження завдань': params[5]
    }))

    common_props: Optional[pd.DataFrame] = None
    time_wait_allocate: Optional[pd.DataFrame] = None
    time_in_system: Optional[pd.DataFrame] = None
    for file_name in filenames:
        data = pd.read_csv(Path(dirpath) / file_name)
        if file_name.startswith('commonProps'):
            common_props = data
            # threshold = 0.01
            # common_props['processorsLoad'] = common_props['processorsLoad'].apply(lambda x: 0 if x < threshold else x)
            # common_props['diskLoad'] = common_props['diskLoad'].apply(lambda x: 0 if x < threshold else x)
            # common_props['ioChannelLoad'] = common_props['ioChannelLoad'].apply(lambda x: 0 if x < threshold else x)
        elif file_name.startswith('timeWaitAllocate'):
            time_wait_allocate = data
        elif file_name.startswith('timeInSystem'):
            time_in_system = data
    
    if common_props is not None and time_wait_allocate is not None and time_in_system is not None:
        datas.append(ParamsData(common_props, time_wait_allocate, time_in_system))
    else:
        raise Exception('empty data')

In [23]:
params_data_frame = pd.concat(params_mat, axis=1)
params_data_frame = params_data_frame.T
params_data_frame.to_csv(verification_res_dir_path / 'params.csv', index=True, index_label='Індекс')
params_data_frame

Unnamed: 0,Кількість сторінок,Кількість процесорів,Кількість дисків,Початок сторінок,Кінець сторінок,Середій інтервал надходження завдань
0,200,4,5,30,40,8
1,400,5,11,70,100,8
2,700,12,12,30,70,8
3,1000,30,30,70,80,15
4,1000,40,8,60,100,9
5,131,2,4,20,60,7
6,131,2,4,20,60,5


In [24]:
from array import array

@attr.frozen
class MeanStddevStats:
    diskLoad_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    diskLoad_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    ioChannelLoad_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    ioChannelLoad_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    processorsLoad_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    processorsLoad_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    totalWaitAllocate_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    totalWaitAllocate_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    useOfPage_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    useOfPage_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeInSystem_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeInSystem_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeWaitAllocate_mean: array[float] = attr.field(init=False, factory=lambda: array('d'))
    timeWaitAllocate_std_dev: array[float] = attr.field(init=False, factory=lambda: array('d'))

mean_stddev_stats_list: list[pd.DataFrame] = []

for index, params_data in enumerate(datas):
    mean_stddev_stats = MeanStddevStats()

    for run_num, group in params_data.common_props.groupby('runNumber'):
        # Calculate means and standard deviations
        diskLoad_mean = calculate_mean(group['timePoint'], group['diskLoad'])
        diskLoad_std_dev = calculate_std_dev(group['timePoint'], group['diskLoad'], diskLoad_mean)

        ioChannelLoad_mean = calculate_mean(group['timePoint'], group['ioChannelLoad'])
        ioChannelLoad_std_dev = calculate_std_dev(group['timePoint'], group['ioChannelLoad'], ioChannelLoad_mean)

        processorsLoad_mean = calculate_mean(group['timePoint'], group['processorsLoad'])
        processorsLoad_std_dev = calculate_std_dev(group['timePoint'], group['processorsLoad'], processorsLoad_mean)

        totalWaitAllocate_mean = calculate_mean(group['timePoint'], group['totalWaitAllocate'])
        totalWaitAllocate_std_dev = calculate_std_dev(group['timePoint'], group['totalWaitAllocate'], totalWaitAllocate_mean)

        useOfPage_mean = calculate_mean(group['timePoint'], group['useOfPage'])
        useOfPage_std_dev = calculate_std_dev(group['timePoint'], group['useOfPage'], useOfPage_mean)

        mean_stddev_stats.diskLoad_mean.append(diskLoad_mean)
        mean_stddev_stats.diskLoad_std_dev.append(diskLoad_std_dev)

        mean_stddev_stats.ioChannelLoad_mean.append(ioChannelLoad_mean)
        mean_stddev_stats.ioChannelLoad_std_dev.append(ioChannelLoad_std_dev)

        mean_stddev_stats.processorsLoad_mean.append(processorsLoad_mean)
        mean_stddev_stats.processorsLoad_std_dev.append(processorsLoad_std_dev)

        mean_stddev_stats.totalWaitAllocate_mean.append(totalWaitAllocate_mean)
        mean_stddev_stats.totalWaitAllocate_std_dev.append(totalWaitAllocate_std_dev)

        mean_stddev_stats.useOfPage_mean.append(useOfPage_mean)
        mean_stddev_stats.useOfPage_std_dev.append(useOfPage_std_dev)
    
    for run_num, group in params_data.time_in_system.groupby('runNumber'):
        timeInSystem_mean = calculate_mean(group['timePoint'], group['timeInSystem'])
        timeInSystem_std_dev = calculate_std_dev(group['timePoint'], group['timeInSystem'], timeInSystem_mean)
        mean_stddev_stats.timeInSystem_mean.append(timeInSystem_mean)
        mean_stddev_stats.timeInSystem_std_dev.append(timeInSystem_std_dev)

    for run_num, group in params_data.time_wait_allocate.groupby('runNumber'):
        timeWaitAllocate_mean = calculate_mean(group['timePoint'], group['timeWaitAllocate'])
        timeWaitAllocate_std_dev = calculate_std_dev(group['timePoint'], group['timeWaitAllocate'], timeWaitAllocate_mean)
        mean_stddev_stats.timeWaitAllocate_mean.append(timeWaitAllocate_mean)
        mean_stddev_stats.timeWaitAllocate_std_dev.append(timeWaitAllocate_std_dev)

    dt = pd.DataFrame(attr.asdict(mean_stddev_stats))
    dt['params_index'] = index
    mean_stddev_stats_list.append(dt)

In [48]:
rename_dict = {
    'diskLoad': 'Завантаження дисків',
    'ioChannelLoad': 'Завантаження каналу введення-виведення',
    'processorsLoad': 'Завантаження процесорів',
    'totalWaitAllocate': "Кількість завдань в очікуванні пам'яті",
    'useOfPage': 'Кількість зайнятих сторінок',
    'timeInSystem': 'Час завдання в системі',
    'timeWaitAllocate': "Час виділення пам'яті",
}


def split_into_means_and_stddevs(data: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
    means = pd.DataFrame()
    stddevs = pd.DataFrame()
    for name in data.columns:
        short_name = name.split('_')[0]
        if name.endswith('mean'):
            means[short_name] = data[name]
        else:
            stddevs[short_name] = data[name]

    means['Індекс набору параметрів'] = data['params_index']
    stddevs['Індекс набору параметрів'] = data['params_index']
    means.rename(columns=rename_dict, inplace=True)
    stddevs.rename(columns=rename_dict, inplace=True)

    column_names = means.columns.tolist()
    column_names = [column_names[-1]] + column_names[:-1]

    means = means[column_names]
    stddevs = stddevs[column_names]

    return means, stddevs

In [49]:
mean_stddev_stats_data_frame = pd.concat(mean_stddev_stats_list, ignore_index=True)
mean_stats_data_frame, stddev_stats_data_frame = split_into_means_and_stddevs(mean_stddev_stats_data_frame)

In [50]:
mean_stats_data_frame.to_csv(verification_res_dir_path / 'mean_stats_data_frame.csv', index=False)
mean_stats_data_frame

Unnamed: 0,Індекс набору параметрів,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,0,0.000936,0.001873,0.31255,0.0,43.287726,9.495022,0.0
1,0,0.000937,0.001874,0.312712,0.0,43.317718,9.491281,0.0
2,0,0.000942,0.00188,0.312961,0.0,43.273784,9.478247,0.0
3,0,0.000934,0.001872,0.312302,0.0,43.2821,9.488552,0.0
4,0,0.000938,0.001874,0.311647,0.0,43.220634,9.464627,0.0
5,1,0.000425,0.00187,0.249554,5e-06,105.600353,9.506471,0.853253
6,1,0.000426,0.001874,0.249672,5e-06,105.573525,9.486391,0.541999
7,1,0.000426,0.001876,0.250683,8e-06,105.660301,9.506772,3.232504
8,1,0.000426,0.001875,0.250636,2e-06,106.032987,9.511519,0.454897
9,1,0.000425,0.001871,0.24935,4e-06,105.412756,9.474933,1.600204


In [51]:
stddev_stats_data_frame.to_csv(verification_res_dir_path / 'stddev_stats_data_frame.csv', index=False)
stddev_stats_data_frame

Unnamed: 0,Індекс набору параметрів,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,0,6.063318e-06,1.2e-05,0.0015,0.0,22.99193,2.891015,0.0
1,0,6.702141e-06,1.3e-05,0.00145,0.0,23.069494,2.897542,0.0
2,0,6.176518e-06,1.3e-05,0.001425,0.0,23.059355,2.892877,0.0
3,0,6.935435e-06,1.2e-05,0.001981,0.0,23.05114,2.899923,0.0
4,0,6.283968e-06,1.6e-05,0.002055,0.0,23.085492,2.898505,0.0
5,1,2.437793e-06,1.1e-05,0.001458,0.002142,56.4485,2.894157,0.024508
6,1,2.564599e-06,1.4e-05,0.001559,0.002235,56.554546,2.912955,0.563946
7,1,3.658403e-06,1.4e-05,0.001914,0.002796,56.546243,2.900689,0.0
8,1,2.41118e-06,1.1e-05,0.001075,0.001349,56.62893,2.89192,0.0
9,1,3.185139e-06,1.7e-05,0.001227,0.002017,56.477288,2.90348,0.0


In [52]:

global_mean_stddev_list: list[pd.DataFrame] = []
mean_stddev_stats_relative_mean_list: list[pd.DataFrame] = []

for i, mean_stddev_stats in mean_stddev_stats_data_frame.groupby('params_index'):
    means = mean_stddev_stats.mean()
    global_mean_stddev_list.append(means)
    mean_stddev_stats_relative_mean = ((mean_stddev_stats - means).abs() * 100) / means
    mean_stddev_stats_relative_mean.fillna(0, inplace=True)
    mean_stddev_stats_relative_mean['params_index'] = i
    mean_stddev_stats_relative_mean_list.append(mean_stddev_stats_relative_mean)

In [53]:
global_mean_data_frame = pd.DataFrame()
global_std_dev_data_frame = pd.DataFrame()
for name in global_mean_stddev_data_frame.columns:
    short_name = name.split('_')[0]
    if name.endswith('mean'):
        global_mean_data_frame[short_name] = global_mean_stddev_data_frame[name]
    else:
        global_std_dev_data_frame[short_name] = global_mean_stddev_data_frame[name]

global_mean_data_frame.rename(columns=rename_dict, inplace=True)
global_std_dev_data_frame.rename(columns=rename_dict, inplace=True)

In [54]:
global_mean_data_frame

Unnamed: 0,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,0.000937,0.001875,0.312434,0.0,43.276393,9.483546,0.0
1,0.000426,0.001873,0.249979,5e-06,105.655984,9.497217,1.336571
2,0.00039,0.001874,0.104206,0.0,61.890033,9.501163,0.0
3,8.3e-05,0.000999,0.022266,0.0,49.767624,9.503462,0.0
4,0.000521,0.001666,0.027824,0.0,88.46388,9.478184,0.0
5,0.001338,0.00214,0.714311,0.031514,58.973092,10.1725,2.257716
6,0.001556,0.002492,0.829801,13468.958747,113.036283,3032.264559,4231.885746


In [55]:
global_mean_data_frame.to_csv(verification_res_dir_path / 'global_mean_data_frame.csv', index=True, index_label='Індекс набору параметрів')

In [56]:
global_std_dev_data_frame

Unnamed: 0,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,6.432276e-06,1.3e-05,0.001682,0.0,23.051482,2.895972,0.0
1,2.851423e-06,1.3e-05,0.001447,0.002108,56.531101,2.90064,0.117691
2,2.92111e-06,1.4e-05,0.000508,0.0,35.078087,2.905734,0.0
3,7.313431e-07,7e-06,0.000146,0.0,38.841223,2.940398,0.0
4,3.530176e-06,1.3e-05,0.000151,0.0,50.794511,2.911897,0.0
5,9.802968e-06,1.5e-05,0.00347,0.184801,30.310663,3.394522,2.343808
6,1.295621e-05,1.9e-05,0.005538,7749.55316,12.44302,32046.78703,38020.62426


In [57]:
global_std_dev_data_frame.to_csv(verification_res_dir_path / 'global_std_dev_data_frame.csv', index=True, index_label='Індекс набору параметрів')

In [58]:
mean_stddev_stats_relative_mean_data_frame = pd.concat(mean_stddev_stats_relative_mean_list, ignore_index=True)
mean_stats_relative_mean_data_frame, stddev_stats_relative_mean_data_frame = split_into_means_and_stddevs(mean_stddev_stats_relative_mean_data_frame)

In [59]:
mean_stats_relative_mean_data_frame.to_csv(verification_res_dir_path / 'mean_stats_relative_mean_data_frame.csv', index=False)
mean_stats_relative_mean_data_frame

Unnamed: 0,Індекс набору параметрів,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,0,0.139269,0.071658,0.037015,0.0,0.026188,0.121013,0.0
1,0,0.058043,0.049845,0.088946,0.0,0.095493,0.081567,0.0
2,0,0.501863,0.26817,0.168492,0.0,0.006027,0.055877,0.0
3,0,0.355473,0.132973,0.0424,0.0,0.01319,0.052792,0.0
4,0,0.050923,0.013694,0.252053,0.0,0.128843,0.199495,0.0
5,1,0.058577,0.1634,0.169834,1.509288,0.052653,0.097436,36.161037
6,1,0.058648,0.012372,0.122648,7.227027,0.078046,0.113994,59.448523
7,1,0.05899,0.168131,0.281451,67.883568,0.004085,0.100608,141.850432
8,1,0.019547,0.11136,0.262765,60.936277,0.356821,0.150588,65.965389
9,1,0.078608,0.128462,0.251734,12.665029,0.230208,0.234638,19.724518


In [60]:
stddev_stats_relative_mean_data_frame.to_csv(verification_res_dir_path / 'stddev_stats_relative_mean_data_frame.csv', index=False)
stddev_stats_relative_mean_data_frame

Unnamed: 0,Індекс набору параметрів,Завантаження дисків,Завантаження каналу введення-виведення,Завантаження процесорів,Кількість завдань в очікуванні пам'яті,Кількість зайнятих сторінок,Час завдання в системі,Час виділення пам'яті
0,0,5.736039,7.416954,10.826112,0.0,0.258347,0.171196,0.0
1,0,4.195475,0.582468,13.7733,0.0,0.078138,0.054187,0.0
2,0,3.976164,4.517335,15.296788,0.0,0.034152,0.106873,0.0
3,0,7.822408,9.277472,17.744158,0.0,0.001483,0.136422,0.0
4,0,2.305679,20.629293,22.152042,0.0,0.147539,0.087461,0.0
5,1,14.506092,17.514559,0.784131,1.616887,0.146117,0.223503,79.175726
6,1,10.05899,4.634427,7.772852,6.027932,0.041471,0.424551,379.175726
7,1,28.300976,5.336185,32.313013,32.669668,0.026785,0.001681,100.0
8,1,15.439399,18.466853,25.670237,36.003583,0.173052,0.30063,100.0
9,1,11.703504,26.0108,15.199759,4.310904,0.095192,0.097901,100.0
