Czytanie plik√≥w fio i dd

In [None]:
import os
import re
from collections import defaultdict
import glob
import numpy as np 
import pandas as pd
import json
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.patches import Patch
from matplotlib.lines import Line2D


In [None]:
def print_dict_tree(d, indent=0, max_depth=None):
    """Rekurencyjnie wypisuje strukturƒô kluczy zagnie≈ºd≈ºonego s≈Çownika do zadanej g≈Çƒôboko≈õci."""
    if not isinstance(d, dict) or (max_depth is not None and indent >= max_depth):
        return
    for key in d:
        print('  ' * indent + str(key))
        print_dict_tree(d[key], indent + 1, max_depth)

# Przyk≈Çad u≈ºycia:
# print_dict_tree(fio_resultsdict, max_depth=3)


def print_latex_image(image_path, image_name, caption=None, label=None, width=None):
    if image_name.endswith('.png'):
        image_name = image_name[:-4]
    width_str = f"[width={width}]" if width else "[width=\\textwidth]"
    caption_str = f"\\caption{{{caption}}}" if caption else "\\caption{TODO caption}" 
    label_str = f"\\label{{{label}}}" if label else f"\\label{{fig:{image_name}}}"
    latex_code = (
        "\\begin{figure}[H]\n"
        "    \\centering\n"
        f"    \\includegraphics{width_str}{{images/{image_path}/{image_name}}}\n"
        f"    {caption_str}\n"
        f"    {label_str}\n"
        "\\end{figure}\n"
    )
    print(latex_code)
    
def generate_caption(
    workload=None,
    storage=None,
    block_size=None,
    compression=None,
    snapshot=None,
    combine_storage_types=False,
    combine_compression_types=False,
    combine_snapshots=False,
    storage_list=None,        # <‚îÄ‚îÄ NEW
    compression_list=None,    # <‚îÄ‚îÄ NEW
    snapshot_list=None,       # <‚îÄ‚îÄ NEW
    workload_translations=None,
    storage_type_translations=None,
    compression_translations=None
):
    """Zwraca naturalnie brzmiƒÖcy caption ‚Äì z pe≈Çnymi listami, gdy u≈ºyto *combine_*."""

    # Pomocnicza funkcja do ≈Çadnego ≈ÇƒÖczenia list: ‚ÄûA, B i C‚Äù
    def natural_join(items):
        if not items:
            return ""
        if len(items) == 1:
            return items[0]
        return ", ".join(items[:-1]) + " i " + items[-1]

    sentence = "Por√≥wnanie wydajno≈õci"

    # 1. OBCIƒÑ≈ªENIE
    if workload:
        sentence += f" dla obciƒÖ≈ºenia typu {workload_translations.get(workload, workload)}"

    # 2. TYP DYSKU
    if combine_storage_types:
        # konwertuj listƒô dysk√≥w na s≈Çowa (przet≈Çumaczone)
        translated = [storage_type_translations.get(s.upper(), f"dysku {s}") for s in (storage_list or [])]
        sentence += f" na {natural_join(translated)}"
    elif storage:
        sentence += f" z u≈ºyciem {storage_type_translations.get(storage.upper(), f'dysku {storage}')}"

    # 3. ROZMIAR BLOKU
    if block_size:
        sentence += (
            " przy domy≈õlnym rozmiarze bloku"
            if block_size == "default"
            else f" przy rozmiarze bloku {block_size}"
        )

    # 4. KOMPRESJA
    if combine_compression_types:
        comp_values = compression_list or []
        has_none = "none" in comp_values
        named = [compression_translations.get(c, c) for c in comp_values if c != "none"]

        if len(comp_values) == 1 and comp_values[0] == "none":
            pass  # nie pisz nic
        elif has_none and len(named) == 1:
            # bez kompresji oraz z kompresjƒÖ lz4
            sentence += f", bez kompresji oraz z kompresjƒÖ {named[0]}"
        elif has_none and named:
            sentence += f", bez kompresji oraz z kompresjƒÖ: {natural_join(named)}"
        elif named:
            sentence += f", z kompresjƒÖ: {natural_join(named)}"
    elif compression and compression != "none":
        sentence += f", z kompresjƒÖ: {compression_translations.get(compression, compression)}"



    # 5. SNAPSHOT
    if combine_snapshots:
        snap_values = [str(s) for s in snapshot_list or []]
        has_zero = "0" in snap_values
        non_zero = [s for s in snap_values if s != "0"]

        if len(snap_values) == 1 and snap_values[0] == "0":
            pass  # nie pisz nic
        elif has_zero and len(non_zero) == 1:
            sentence += f", bez snapshotu oraz z snapshotem {non_zero[0]}"
        elif has_zero and non_zero:
            sentence += f", bez snapshotu oraz z snapshotami: {natural_join(non_zero)}"
        elif non_zero:
            sentence += f", z snapshotami: {natural_join(non_zero)}"

    sentence += "."
    return sentence


In [None]:
fs_translations = {
        "btrfs": "Btrfs",
        "ext4": "Ext4",
        "xfs": "XFS",
        "f2fs": "F2FS",
        "exfat": "exFAT",
        "zfs": "ZFS",
        "zfs_nocache": "ZFS bez cache",
        "zfs_limit": "ZFS Limit",
        "zfs_l2arc": "ZFS L2ARC",
        "zfs_l2arc_limit": "ZFS L2ARC Limit",
    }

metric_translations = {
    "Bandwidth (MiB/s)": "Przepustowo≈õƒá (MiB/s)",
    "Bandwidth READ (MiB/s)": "Odczyt (MiB/s)",
    "Bandwidth WRITE (MiB/s)": "Zapis (MiB/s)",
    "IOPS": "IOPS",
    "IOPS READ": "IOPS - odczyt",
    "IOPS WRITE": "IOPS - zapis",
    "Latency (ms)": "Op√≥≈∫nienie (ms)",
    "Latency READ (ms)": "Op√≥≈∫nienie - odczyt (ms)",
    "Latency WRITE (ms)": "Op√≥≈∫nienie - zapis (ms)"
}

workload_translations = {
    "database": "baza danych",
    "multimedia": "multimedia",
    "webserver": "serwer WWW",
    "archive": "archiwum"
}

storage_type_translations = {
    "NVME": "NVMe",
    "RAID02HDD": "RAID0 2xHDD",
    "RAID04HDD": "RAID0 4xHDD",
    "STRIPE4HDD": "Stripe 4xHDD",
    "STRIPE2HDD": "Stripe 2xHDD",
    "MIRROR": "Mirror",
}

compression_translations = {
    "none": "brak",
    "lz4": "LZ4",
    "zlib_1": "zlib-1",
    "zlib_3": "zlib-3",
    "zlib-9": "zlib-9",
    "zstd_1": "zstd-1",
    "zstd_3": "zstd-3",
    "zstd_9": "zstd-9",
    "zstd_15": "zstd-15",
}




In [None]:
def parse_fio_results(file_path):
    def parse_text_fio(file):
        # Regular expressions
        bandwidth_regex = re.compile(r'WRITE: bw=(\d+(?:\.\d+)?)([MK]iB/s)')
        bandwidth_read_regex = re.compile(r'READ: bw=(\d+(?:\.\d+)?)([MK]iB/s)')
        iops_regex = re.compile(r'write: IOPS=(\d+)')
        iops_read_regex = re.compile(r'read: IOPS=(\d+)')
        latency_regex = re.compile(r'lat (\([mu]sec\)): min=\d+\.?\d*[km]?, max=\d+\.?\d*[km]?, avg=(\d+\.\d+[km]?), stdev=\d+\.?\d*')

        def convert_bandwidth(value, unit):
            value = float(value)
            if unit == "KiB/s":
                return value / 1024
            return value

        results = {}
        last = 'read'
        for line in file:
            if 'write' in line:
                last = 'write'
            elif 'read' in line:
                last = 'read'

            if (bw := bandwidth_regex.search(line)):
                value, unit = bw.groups()
                results['Bandwidth WRITE (MiB/s)'] = convert_bandwidth(value, unit)
            if (bw := bandwidth_read_regex.search(line)):
                value, unit = bw.groups()
                results['Bandwidth READ (MiB/s)'] = convert_bandwidth(value, unit)
            if (iops := iops_regex.search(line)):
                results['IOPS WRITE'] = float(iops.group(1))
            if (iops := iops_read_regex.search(line)):
                results['IOPS READ'] = float(iops.group(1))
            if (lat := latency_regex.search(line)):
                lat_val = float(lat.group(2))
                if lat.group(1) == '(usec)':
                    lat_val /= 1000
                results[f'Latency {last.upper()} (ms)'] = lat_val

        return results

    def parse_json_fio(data):
        results = {}
        for job in data.get('jobs', []):
            for rw_type in ['read', 'write']:
                if job.get(rw_type, {}).get('iops', 0) > 0:
                    iops = job[rw_type]['iops']
                    bw_kib = job[rw_type]['bw']
                    latency_ns = job[rw_type].get('lat_ns', {}).get('mean', 0)

                    results[f'IOPS {rw_type.upper()}'] = round(iops, 2)
                    results[f'Bandwidth {rw_type.upper()} (MiB/s)'] = round(bw_kib / 1024, 2)
                    results[f'Latency {rw_type.upper()} (ms)'] = round(latency_ns / 1_000_000, 3)

        return results

    with open(file_path, 'r') as f:
        first_char = f.read(1)
        f.seek(0)
        if first_char == '{':
            try:
                data = json.load(f)
                return parse_json_fio(data)
            except json.JSONDecodeError:
                f.seek(0)
                return parse_text_fio(f)
        else:
            return parse_text_fio(f)


def parse_dd_results(file_path):
    # Regular expressions
    bandwidth_regex = re.compile(r'(\d+(?:\.\d+)?) ([GMK]B/s)')
    time_regex = re.compile(r'(\d+(?:\.\d+)?) s')

    # Function to convert bandwidth to MiB/s
    def convert_bandwidth(value, unit):
        value = float(value)
        if unit == "KB/s":
            return value / 1024  # Convert KB/s to MiB/s
        elif unit == "MB/s":
            return value  # Already in MiB/s
        elif unit == "GB/s":
            return value * 1024  # Convert GB/s to MiB/s
        return value

    results = {}

    with open(file_path, 'r') as file:
        for line in file:
            # Match bandwidth
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                value, unit = bw_match.groups()
                if 'write' in file_path:
                    results['Bandwidth WRITE (MiB/s)'] = convert_bandwidth(value, unit)
                else:
                    results['Bandwidth READ (MiB/s)'] = convert_bandwidth(value, unit)

            # Match time
            time_match = time_regex.search(line)
            if time_match:
                results['Time (s)'] = float(time_match.group(1))

    return results


from collections import defaultdict
import os
import glob
import re

def extract_values(resultsfolder, file_names, parser, program_type):
    resultsdict = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict))))))

    def extract_key(file_name):
        match = re.match(r"^[^_]+_(.*?)_test", file_name)
        return match.group(1) if match else os.path.splitext(file_name)[0]

    prepaths = [folder for folder in glob.glob(os.path.join(resultsfolder, '*/')) if program_type in folder]

    for prepath in prepaths:
        folder_parts = os.path.basename(os.path.normpath(prepath)).split('_')
        filesystem = '_'.join(folder_parts[2:-1])
        storage = folder_parts[-1]

        if program_type in ['fio_results', 'dd_results']:
            block_size_folders = glob.glob(os.path.join(prepath, '*/'))
            for block_size_folder in block_size_folders:
                folder_name_parts = os.path.basename(os.path.normpath(block_size_folder)).split('_')

                block_size = 'unknown'
                direct = '1'  # domy≈õlna warto≈õƒá
                compression = 'none'

                if len(folder_name_parts) >= 5 and folder_name_parts[0] == "block" and folder_name_parts[1] == "size":
                    block_size = folder_name_parts[2]
                    try:
                        direct_idx = folder_name_parts.index("direct")
                        direct = folder_name_parts[direct_idx + 1]
                    except ValueError:
                        direct = '1'  # fallback

                    try:
                        compression_idx = folder_name_parts.index("compression")
                        compression = '_'.join(folder_name_parts[compression_idx + 1:]) or 'none'
                    except ValueError:
                        compression = 'none'
                else:
                    print(f"Pomijanie folderu o nieoczekiwanej strukturze: {block_size_folder}")
                    continue

                folders = glob.glob(os.path.join(block_size_folder, '*/'))
                cumulative_data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

                for folder in folders:
                    # Automatyczne wykrycie plik√≥w wynikowych (fio_*_test_output*.json lub .txt)
                    file_paths = glob.glob(os.path.join(folder, 'fio_*_test_output*.*'))

                    for file_path in file_paths:
                        if os.path.exists(file_path):
                            try:
                                file_name = os.path.basename(file_path)
                                results = parser(file_path)
                                if results:
                                    test_key = extract_key(file_name)

                                    # WyciƒÖgnij snapshot z nazwy pliku, np. _snapshots_120
                                    snapshot_match = re.search(r'_snapshots_(\d+)', file_name)
                                    snapshot = snapshot_match.group(1) if snapshot_match else '0'

                                    for key, value in results.items():
                                        cumulative_data[test_key][snapshot][key].append(value)
                            except Exception as e:
                                print(f"B≈ÇƒÖd podczas parsowania {file_path}: {e}")
                        else:
                            print(f"Plik nie znaleziony: {file_path}")

                for test_key, snapshots_data in cumulative_data.items():
                    for snapshot, metrics in snapshots_data.items():
                        if metrics:
                            ranges_snapshot = {
                                key: {
                                    'median': round(np.median(values), 3),
                                    'std': round(np.std(values), 3),
                                    'min': round(min(values), 3),
                                    'max': round(max(values), 3),
                                    'avg': round(sum(values) / len(values), 2)
                                } if values else '-' for key, values in metrics.items()
                            }
                            resultsdict[filesystem][direct][block_size][compression][storage][snapshot][test_key] = ranges_snapshot

        else:
            # Programy bez block size i snapshot√≥w (np. hdparm)
            folders = glob.glob(os.path.join(prepath, '*/'))
            cumulative_data = defaultdict(lambda: defaultdict(list))

            for folder in folders:
                file_paths = glob.glob(os.path.join(folder, '*'))
                for file_path in file_paths:
                    if os.path.exists(file_path):
                        try:
                            file_name = os.path.basename(file_path)
                            results = parser(file_path)
                            if results:
                                test_key = extract_key(file_name)
                                for key, value in results.items():
                                    cumulative_data[test_key][key].append(value)
                        except Exception as e:
                            print(f"B≈ÇƒÖd podczas parsowania {file_path}: {e}")
                    else:
                        print(f"Plik nie znaleziony: {file_path}")

            for test_key, metrics in cumulative_data.items():
                if metrics:
                    ranges = {
                        key: {
                            'median': round(np.median(values), 3),
                            'std': round(np.std(values), 3),
                            'min': round(min(values), 3),
                            'max': round(max(values), 3),
                            'avg': round(sum(values) / len(values), 2)
                        } if values else '-' for key, values in metrics.items()
                    }
                    resultsdict[filesystem]['1']['no_block_size']['none'][storage]['0'][test_key] = ranges

    return resultsdict


def parse_hdparm_results(file_path):
    # Regular expression to match the bandwidth
    bandwidth_regex = re.compile(r'Timing O_DIRECT disk reads: (\d+(?:\.\d+)?) MB in .* seconds = (\d+(?:\.\d+)) MB/sec')

    results = {}

    with open(file_path, 'r') as file:
        for line in file:
            # Match bandwidth
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                total_mb, bandwidth = bw_match.groups()
                results['Total Data Read (MB)'] = float(total_mb)
                results['Bandwidth (MiB/s)'] = float(bandwidth)

    return results
    
def extract_hdparm_values_by_device(resultsfolder, file_names, parser, group_by_computer=False):
    # Adjust the default dictionary structure based on whether grouping by computer
    resultsdict = (
        defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
        if group_by_computer
        else defaultdict(lambda: defaultdict(list))
    )

    # Iterate through all device type folders (e.g., hdparm_results_btrfs_hdd)
    prepaths = glob.glob(os.path.join(resultsfolder, '*/'))
    for prepath in prepaths:
        # Check if folder starts with 'hdparm'
        folder_name = os.path.basename(os.path.normpath(prepath))
        if not folder_name.startswith('hdparm'):
            continue  # skip non-hdparm folders

        device_type = prepath.split('_')[-1].lower().strip('\\').strip('/')
        
        # Iterate through configuration folders
        config_folders = glob.glob(os.path.join(prepath, '*/'))  # e.g., block_size_4096_compression_none/
        for config_folder in config_folders:
            # Iterate through computer folders (lab-sec-*)
            computer_folders = glob.glob(os.path.join(config_folder, '*/'))
            for computer_folder in computer_folders:
                computer_name = os.path.basename(os.path.normpath(computer_folder))
                
                for file_name in file_names:
                    file_path = os.path.join(computer_folder, file_name)
                    if os.path.exists(file_path):
                        try:
                            # Parse the file and collect results
                            results = parser(file_path)
                            if group_by_computer:
                                for key, value in results.items():
                                    resultsdict[device_type][computer_name][key].append(value)
                            else:
                                for key, value in results.items():
                                    resultsdict[device_type][key].append(value)
                        except Exception as e:
                            print(f"Error parsing {file_path}: {e}")
                    else:
                        print(f"File not found: {file_path}")

    # Aggregate results by calculating min, max, and avg for each metric
    aggregated_results = {}
    for device_type, computers_or_metrics in resultsdict.items():
        if group_by_computer:
            aggregated_results[device_type] = {}
            for computer, metrics in computers_or_metrics.items():
                aggregated_results[device_type][computer] = {
                    key: {
                        'median': round(np.median(values), 3),
                        'std': round(np.std(values), 3),
                        'min': round(min(values), 3),
                        'max': round(max(values), 3),
                        'avg': round(sum(values) / len(values), 2)
                    } if values else '-' for key, values in metrics.items()
                }
        else:
            aggregated_results[device_type] = {
                key: {
                    'median': round(np.median(values), 3),
                    'std': round(np.std(values), 3),
                    'min': round(min(values), 3),
                    'max': round(max(values), 3),
                    'avg': round(sum(values) / len(values), 2)
                } if values else '-' for key, values in computers_or_metrics.items()
            }

    return aggregated_results

In [None]:
tests = "zwykle"
resultsfolder = ''
fio_file_names = []

if tests == "article":
    resultsfolder = '../wyniki_50G_article/'
    fio_file_names = [
    'fio_database_article_test_output.txt',
    'fio_seq_read_article_test_output.txt',
    'fio_seq_write_article_test_output.txt',
    ]
elif tests == "zwykle":
    resultsfolder = '../wyniki_zwykle/'
    fio_file_names = [
        'fio_database_test_output.txt',
        'fio_multimedia_test_output.txt',
        'fio_webserver_test_output.txt',
        'fio_archive_test_output.txt',
    ]


fio_resultsdict = extract_values(resultsfolder, fio_file_names, parse_fio_results, program_type='fio_results')
print_dict_tree(fio_resultsdict, max_depth=6)

# Example usage for dd
dd_file_names = [
    'dd_read_test_output.txt',
    'dd_write_test_output.txt',
]

dd_resultsdict = extract_values(resultsfolder, dd_file_names, parse_dd_results, program_type='dd_results')
print(dd_resultsdict)

hdparm_file_names = [
    'hdparm_test_output.txt',
]

hdparm_resultsdict = extract_hdparm_values_by_device(resultsfolder, hdparm_file_names, parse_hdparm_results, group_by_computer=False)

print(hdparm_resultsdict)

In [None]:
def deep_merge_results(dict1, dict2):
    """Rekurencyjnie scala dict2 do dict1 na g≈Çƒôboko≈õƒá 3 poziom√≥w."""
    for fs, block_sizes in dict2.items():
        if fs not in dict1:
            dict1[fs] = block_sizes
            continue
        for block_size, storages in block_sizes.items():
            if block_size not in dict1[fs]:
                dict1[fs][block_size] = storages
                continue
            for storage, workloads in storages.items():
                if storage not in dict1[fs][block_size]:
                    dict1[fs][block_size][storage] = workloads
                else:
                    # Je≈õli istnieje, scal workloady (np. database, multimedia, itd.)
                    for workload, metrics in workloads.items():
                        if workload not in dict1[fs][block_size][storage]:
                            dict1[fs][block_size][storage][workload] = metrics
                        else:
                            # Je≈õli istnieje, scal metryki (np. Bandwidth READ, IOPS, itd.)
                            dict1[fs][block_size][storage][workload].update(metrics)


if tests == "article":
    fio_file_names = [
    'fio_database_article_test_output.json',
    'fio_seq_read_article_test_output.json',
    'fio_seq_write_article_test_output.json',
    ]

    fio_resultsdict_raid= extract_values('../wyniki_raid_article_50G/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_raid_compression= extract_values('../wyniki_raid_article_50G_kompresja/', fio_file_names, parse_fio_results, program_type='fio_results')
    deep_merge_results(fio_resultsdict, fio_resultsdict_raid)
    deep_merge_results(fio_resultsdict, fio_resultsdict_raid_compression)
    print_dict_tree(fio_resultsdict, max_depth=3)

elif tests == "zwykle":
    fio_file_names = [
        'fio_database_test_output.json',
        'fio_multimedia_test_output.json',
        'fio_webserver_test_output.json',
        'fio_archive_test_output.json',
    ]
    #Scalanie r√≥≈ºnych folder√≥w wynik√≥w

    # Usu≈Ñ dane dla zfs i zfs_nocache
    fio_resultsdict.pop('zfs', None)
    fio_resultsdict.pop('zfs_nocache', None)
    fio_resultsdict.pop('zfs_primary', None)

    # Wczytaj dane z drugiego folderu
    fio_resultsdict_nowe_zfs = extract_values('../wyniki_zwykle_nowe_zfs/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_nowe_zfs.pop('zfs', None)
    fio_resultsdict_nowe_zfs.pop('zfs_primary', None)
    fio_resultsdict_zfs_stats = extract_values('../wyniki_zwykle_zfs_stats/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_zfs_stats_limit = extract_values('../wyniki_zwykle_zfs_stats_limit/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_kompresja = extract_values('../wyniki_zwykle_kompresja/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_kompresja.pop('zfs', None)
    fio_resultsdict_kompresja.pop('zfs_l2arc', None)
    fio_resultsdict_kompresja_arcstats = extract_values('../wyniki_zwykle_kompresja_arcstats/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_raid = extract_values('../wyniki_raid/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_snapshot = extract_values('../wyniki_zwykle_snapshots_nowe/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_uring = extract_values('../wyniki_zwykle_nowy_silnik/', fio_file_names, parse_fio_results, program_type='fio_results')

    fio_resultsdict_nowa_wersja = extract_values('../wyniki_zwykle_directtest_nowe2.3/', fio_file_names, parse_fio_results, program_type='fio_results')
    if 'zfs' in fio_resultsdict_nowa_wersja:
        fio_resultsdict_nowa_wersja['zfs_nowa_wersja'] = fio_resultsdict_nowa_wersja.pop('zfs')
    fio_resultsdict_snapshots_nowa_wersja = extract_values('../wyniki_zwykle_snapshots_nowe2.3/', fio_file_names, parse_fio_results, program_type='fio_results')
    if 'zfs_nocache' in fio_resultsdict_snapshots_nowa_wersja:
        fio_resultsdict_snapshots_nowa_wersja['zfs_nocache_nowa_wersja'] = fio_resultsdict_snapshots_nowa_wersja.pop('zfs_nocache')



    # Dodaj "_uring" do wszystkich kluczy na najwy≈ºszym poziomie w fio_resultsdict_uring
    fio_resultsdict_uring_renamed = {}
    for fs, fs_data in fio_resultsdict_uring.items():
        fio_resultsdict_uring_renamed[f"{fs}_uring"] = fs_data
    deep_merge_results(fio_resultsdict, fio_resultsdict_uring_renamed)

    # Dodaj dane z drugiego folderu do g≈Ç√≥wnego zbioru
    #fio_resultsdict.update(fio_resultsdict_new)
    # Zmie≈Ñ nazwƒô kluczy 'zfs' i 'zfs_l2arc' na 'zfs_limit' i 'zfs_l2arc_limit' w fio_resultsdict_zfs_stats_limit
    if 'zfs' in fio_resultsdict_zfs_stats_limit:
        fio_resultsdict_zfs_stats_limit['zfs_limit'] = fio_resultsdict_zfs_stats_limit.pop('zfs')
    if 'zfs_l2arc' in fio_resultsdict_zfs_stats_limit:
        fio_resultsdict_zfs_stats_limit['zfs_l2arc_limit'] = fio_resultsdict_zfs_stats_limit.pop('zfs_l2arc')
    deep_merge_results(fio_resultsdict, fio_resultsdict_nowa_wersja)
    #deep_merge_results(fio_resultsdict, fio_resultsdict_snapshots_nowa_wersja)
    deep_merge_results(fio_resultsdict, fio_resultsdict_nowe_zfs)
    #deep_merge_results(fio_resultsdict, fio_resultsdict_snapshot)
    deep_merge_results(fio_resultsdict, fio_resultsdict_zfs_stats_limit)
    deep_merge_results(fio_resultsdict, fio_resultsdict_zfs_stats)
    deep_merge_results(fio_resultsdict, fio_resultsdict_kompresja)
    deep_merge_results(fio_resultsdict, fio_resultsdict_kompresja_arcstats)
    deep_merge_results(fio_resultsdict, fio_resultsdict_raid)
    print_dict_tree(fio_resultsdict, max_depth=6)


Funkcja do generowania wykres√≥w

In [None]:
def darken_color(color, amount=0.7):
    """Zmniejsz jasno≈õƒá koloru RGB."""
    c = np.array(to_rgb(color))
    return tuple(np.clip(c * amount, 0, 1))

def plot_performance_metrics(data, metrics, storage_types, block_sizes, direct_values,
                              include_min_max=False, include_std=False,
                              workloads=None,
                              file_systems=None, colors=None,
                              hdparm_data=None, hdparm_stat='avg',
                              preserve_ylim=False, debug=False,
                              combine_storage_types=False, 
                              combine_compression_types=False, compression_types=None,
                              save_dir=None, save=False,
                              caption_autogen=False,
                              snapshots=None, combine_snapshots=False, combine_block_sizes=False,
                              combine_direct_values=False):

    if colors is None:
        colors = [
            'b', 'g', 'r', 'c', 'm', 'y', 'orange', '#FFD700', '#A0522D',
            '#8A2BE2', '#00CED1', '#DC143C', '#228B22', '#FF69B4', '#1E90FF',
            '#FFDAB9', '#7FFF00', '#D2691E', '#483D8B', '#00FA9A', '#FF6347'
        ]
    if file_systems is None:
        file_systems = list(data.keys())

    direct_values_list = direct_values if not combine_direct_values else ["none"]
    for direct in direct_values_list:
        block_sizes_list = block_sizes if not combine_block_sizes else ["none"]
        for block_size in block_sizes_list:
            compression_list = compression_types if compression_types else ["none"]
            if combine_compression_types:
                compression_list = [None]
            for compression in compression_list:
                storage_list = storage_types if not combine_storage_types else [None]

                for storage in storage_list:
                    snapshot_list = snapshots if snapshots else ["0"]
                    snapshot_list = snapshot_list if not combine_snapshots else [None]
                    for snapshot in snapshot_list: 
                        for workload in workloads:
                            prepared_data = {}

                            for fs in file_systems:   
                                for drct in (direct_values if combine_direct_values else [direct]):   
                                    for bs in (block_sizes if combine_block_sizes else [block_size]):
                                        for st in (storage_types if combine_storage_types else [storage]):
                                            for comp in (compression_types if combine_compression_types else [compression]):
                                                for snap in (snapshots if combine_snapshots else [snapshot]): 
                                                    fs_data = data.get(fs, {})

                                                    if not fs_data:
                                                        if debug:
                                                            print(f"‚ö†Ô∏è Pomijanie {fs} - brak danych")
                                                        continue

                                                    drct_data = fs_data.get(drct, {})
                                                    if not drct_data:
                                                        if debug:
                                                            print(f"‚ö†Ô∏è Pomijanie {fs} - brak danych dla direct='{drct}'")
                                                        continue

                                                    if combine_block_sizes: #Jak jest combine_block_sizes, to nie u≈ºywaj default block size
                                                        block_data = drct_data.get(bs, {}) 
                                                    else:
                                                        block_data = drct_data.get(bs, {}) or drct_data.get('default', {})

                                                    if not block_data:
                                                        if debug:
                                                            if combine_block_sizes:
                                                                print(f"‚ö†Ô∏è Pomijanie {fs} - brak danych dla block_size '{bs}'")
                                                            else:
                                                                print(f"‚ö†Ô∏è Pomijanie {fs} - brak danych dla block_size '{bs}' lub 'default'")
                                                        continue

                                                    storage_data = block_data.get(comp, {}).get(st.lower())
                                                    if not storage_data:
                                                        if debug:
                                                            print(f"‚ö†Ô∏è Pomijanie {fs} - brak danych dla storage '{st}' w block_size '{block_size}' lub 'default'")
                                                        continue

                                                    snapshot_data = storage_data.get(snap) 

                                                    if not isinstance(snapshot_data, dict):
                                                        if debug:
                                                            print(f"‚ö†Ô∏è Pomijanie {fs} - storage '{st}': brak danych dla snapshot '{snapshot}'")
                                                        continue

                                                    workload_data = snapshot_data.get(workload)
                                                    if not isinstance(workload_data, dict):
                                                        if debug:
                                                            print(f"‚ö†Ô∏è Pomijanie {fs} - storage '{st}': brak danych dla workload '{workload}'")
                                                        continue

                                                    label_parts = [fs_translations.get(fs, fs)]
                                                    if combine_direct_values:
                                                        label_parts.append("bez direct" if drct == "0" else "z direct")
                                                    if combine_block_sizes:
                                                        label_parts.append("domy≈õlny rozmiar bloku" if bs == "default" else f"rozmiar bloku: {bs}")
                                                    if combine_storage_types:
                                                        label_parts.append(storage_type_translations.get(st.upper(), st.upper()))
                                                    if combine_compression_types:
                                                        label_parts.append(f"({compression_translations.get(comp, comp)})")
                                                    if combine_snapshots:
                                                        if snap == 0:
                                                            label_parts.append("Snapshot: brak")
                                                        else:
                                                            label_parts.append(f"Snapshot: {snap}")
                                                    label = " ".join(label_parts)

                                                    prepared_data[label] = workload_data

                            if not prepared_data:
                                if debug:
                                    print(f"‚ö†Ô∏è Brak danych po przetworzeniu dla compression={compression}, block_size={block_size}, workload={workload}")
                                continue

                            plots = []
                            for metric in metrics:
                                if isinstance(metric, dict):
                                    read_metric = metric.get("read")
                                    write_metric = metric.get("write")
                                    first_key = next(iter(prepared_data), None)
                                    if not first_key:
                                        continue
                                    sample = prepared_data[first_key]
                                    if read_metric and write_metric and read_metric in sample and write_metric in sample:
                                        plots.append(("grouped", metric["name"], read_metric, write_metric))
                                    elif read_metric and read_metric in sample:
                                        plots.append(("single", read_metric))
                                    elif write_metric and write_metric in sample:
                                        plots.append(("single", write_metric))
                                    else:
                                        plots.append(("single", metric["name"]))
                                else:
                                    first_key = next(iter(prepared_data), None)
                                    if first_key and metric in prepared_data[first_key]:
                                        plots.append(("single", metric))

                            fig, axs = plt.subplots(len(plots), 1, figsize=(10, 3 * len(plots)))
                            if len(plots) == 1:
                                axs = [axs]

                            # Zwiƒôksz przestrze≈Ñ na tytu≈Ç, ale zachowaj kontrolƒô nad "plot area"
                            fig.subplots_adjust(top=0.9, bottom=0.15, left=0.15, right=0.95)

                            # Ustaw sta≈Çy rozmiar plot area (w jednostkach figure-coordinates)
                            fixed_ax_height = 0.6 / len(plots)  # rozdziel proporcjonalnie
                            for i, ax in enumerate(axs):
                                ax.set_position([0.15, 0.9 - (i+1)*fixed_ax_height, 0.8, fixed_ax_height * 0.9])

                            
                            translated_workload = workload_translations.get(workload, workload)
                            translated_block_size = "domy≈õlny" if block_size == "default" else block_size

                            combined_label = ", ".join(
                                [storage_type_translations.get(s.upper(), s.upper()) for s in storage_types]
                            ) if combine_storage_types else storage_type_translations.get(storage.upper(), storage.upper())

                            if combine_compression_types:
                                compression_label = ", ".join([compression_translations.get(c, c) for c in compression_types])
                            else:
                                compression_label = compression_translations.get(compression, compression)

                            if combine_snapshots:
                                snapshot_label = ", ".join(["brak" if str(s) == "0" else f"{s}" for s in snapshots])
                            else:
                                snapshot_label = "brak" if str(snapshot) == "0" else f"{snapshot}"

                            if combine_direct_values:
                                direct_label = ", ".join([f"{d}" for d in direct_values])
                            else:
                                direct_label = direct

                            title_groups = [
                                translated_workload.capitalize() if workload else "",
                                f'Direct: {direct_label}',
                                f"Typ dysku: {combined_label}",
                                f"Rozmiar bloku: {translated_block_size}",
                                f"Snapshot: {snapshot_label}",
                                f"Kompresja: {compression_label}",
                            ]

                            # Parametry ≈Çamania
                            max_line_length = 80  # mo≈ºna dostosowaƒá do szeroko≈õci wykresu
                            lines = []
                            current_line = ""

                            for part in title_groups:
                                if not part:
                                    continue
                                # +3 uwzglƒôdnia " | " miƒôdzy elementami
                                if len(current_line) + len(part) + (3 if current_line else 0) <= max_line_length:
                                    if current_line:
                                        current_line += " | " + part
                                    else:
                                        current_line = part
                                else:
                                    lines.append(current_line)
                                    current_line = part

                            if current_line:
                                lines.append(current_line)

                            fig.suptitle("\n".join(lines))



                            for i, plot in enumerate(plots):
                                ax = axs[i]

                                if plot[0] == "grouped":
                                    _, base_name, read_metric, write_metric = plot
                                    fs_labels = []
                                    read_vals, write_vals = [], []
                                    read_err, write_err = ([], []), ([], [])
                                    color_map = {}

                                    for idx, label in enumerate(prepared_data):
                                        workload_data = prepared_data[label]
                                        read_data = workload_data.get(read_metric, {})
                                        write_data = workload_data.get(write_metric, {})

                                        read_avg = read_data.get('avg')
                                        write_avg = write_data.get('avg')
                                        if read_avg is not None and write_avg is not None:
                                            fs_labels.append(label)
                                            read_vals.append(read_avg)
                                            write_vals.append(write_avg)

                                            if include_min_max:
                                                read_min = read_data.get('min', read_avg)
                                                read_max = read_data.get('max', read_avg)
                                                read_err[0].append(read_avg - read_min)
                                                read_err[1].append(read_max - read_avg)

                                                write_min = write_data.get('min', write_avg)
                                                write_max = write_data.get('max', write_avg)
                                                write_err[0].append(write_avg - write_min)
                                                write_err[1].append(write_max - write_avg)

                                            elif include_std:
                                                read_std = read_data.get('std', 0)
                                                write_std = write_data.get('std', 0)
                                                read_err[0].append(read_std)
                                                read_err[1].append(read_std)
                                                write_err[0].append(write_std)
                                                write_err[1].append(write_std)


                                            color_map[label] = colors[idx % len(colors)]

                                    x = np.arange(len(fs_labels))
                                    bar_width = 0.35
                                    read_colors = [color_map[fs] for fs in fs_labels]
                                    write_colors = [darken_color(color_map[fs]) for fs in fs_labels]

                                    read_bars = ax.bar(x - bar_width/2, read_vals, bar_width,
                                                    label='_Odczyt', color=read_colors,
                                                    yerr=read_err if include_min_max or include_std else None, capsize=5)
                                    write_bars = ax.bar(x + bar_width/2, write_vals, bar_width,
                                                        label='_Zapis', color=write_colors,
                                                        yerr=write_err if include_min_max or include_std else None, capsize=5)

                                    for j, bar in enumerate(read_bars):
                                        y = bar.get_height()
                                        err = read_err[1][j] if include_min_max or include_std else 0
                                        ax.text(bar.get_x() + bar.get_width() / 2, y + err + 0.02 * y,
                                                f'{y:.2f}\nOdczyt', ha='center', va='bottom', fontsize=8)

                                    for j, bar in enumerate(write_bars):
                                        y = bar.get_height()
                                        err = write_err[1][j] if include_min_max or include_std else 0
                                        ax.text(bar.get_x() + bar.get_width() / 2, y + err + 0.02 * y,
                                                f'{y:.2f}\nZapis', ha='center', va='bottom', fontsize=8)

                                    max_height = max(
                                        [v + e for v, e in zip(read_vals, read_err[1])] +
                                        [v + e for v, e in zip(write_vals, write_err[1])]
                                    ) if include_min_max or include_std else max(max(read_vals, default=0), max(write_vals, default=0))
                                    ax.set_ylim(0, max_height * 1.4)

                                    translated_label = metric_translations.get(base_name, base_name)
                                    ax.set_xticks(x)
                                    if len(fs_labels) >= 6:
                                        ax.set_xticklabels(fs_labels, rotation=45, ha='center', fontsize=8)
                                    else:
                                        ax.set_xticks(x)
                                        ax.set_xticklabels(fs_labels, fontsize=10)  

                                    ax.set_ylabel(translated_label)
                                    ax.set_title(translated_label)
                                    ax.grid(axis='y', linestyle='--', alpha=0.5, zorder=0)
                                    for spine in ax.spines.values():
                                        spine.set_edgecolor('black')
                                        spine.set_linewidth(0.5)
                                    # ax.legend()
                                    if hdparm_data and hdparm_stat and metric["name"] == "Bandwidth (MiB/s)" and not combine_storage_types:
                                        if storage and storage.lower() in hdparm_data:
                                            device_data = hdparm_data[storage.lower()]
                                            hdparm_val = device_data.get("Bandwidth (MiB/s)", {}).get(hdparm_stat)
                                            if hdparm_val:
                                                ylim = ax.get_ylim()
                                                label_text = f'hdparm {storage} ({hdparm_val:.1f} MiB/s)'
                                                if hdparm_val <= ylim[1]:
                                                    ax.axhline(hdparm_val, color='#8B0000', linestyle='--', linewidth=1.5, alpha=0.5 , label=label_text)
                                                    ax.legend(fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))
                                                else:
                                                    # Dodaj "fake" obiekt do legendy
                                                    legend_line = Line2D([0], [0], color='#8B0000', linestyle='--', linewidth=1.5, alpha=0.5)
                                                    ax.legend([legend_line], [label_text], fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))
                                        elif debug:
                                            print(f"‚ö†Ô∏è Brak danych hdparm dla {storage} w {metric['name']}")

                                elif plot[0] == "single":
                                    _, metric = plot
                                    fs_labels = []
                                    avg_values = []
                                    y_errs = ([], [])
                                    color_map = {}

                                    for idx, label in enumerate(prepared_data):
                                        workload_data = prepared_data[label]
                                        metric_data = workload_data.get(metric, {})
                                        avg = metric_data.get("avg")
                                        if avg is not None:
                                            fs_labels.append(label)
                                            avg_values.append(avg)

                                            if include_min_max:
                                                min_val = metric_data.get("min", avg)
                                                max_val = metric_data.get("max", avg)
                                                y_errs[0].append(avg - min_val)
                                                y_errs[1].append(max_val - avg)
                                            elif include_std:
                                                std_val = metric_data.get("std", 0)
                                                y_errs[0].append(std_val)
                                                y_errs[1].append(std_val)


                                            color_map[label] = colors[idx % len(colors)]

                                    bars = ax.bar(fs_labels, avg_values, color=[color_map[fs] for fs in fs_labels],
                                                yerr=y_errs if include_min_max or include_std else None, capsize=5)

                                    if len(fs_labels) >= 6:
                                        ax.set_xticks(range(len(fs_labels)))
                                        ax.set_xticklabels(fs_labels, rotation=45, ha='center', fontsize=8)
                                    else:
                                        ax.set_xticks(range(len(fs_labels)))
                                        ax.set_xticklabels(fs_labels, fontsize=10)

                                    for j, bar in enumerate(bars):
                                        y = bar.get_height()
                                        err = y_errs[1][j] if include_min_max or include_std else 0
                                        ax.text(bar.get_x() + bar.get_width() / 2, y + err + 0.02 * y,
                                                f'{y:.2f}', ha='center', va='bottom', fontsize=8)

                                    if avg_values:
                                        max_height = max([v + e for v, e in zip(avg_values, y_errs[1])] if include_min_max or include_std else avg_values)
                                        ax.set_ylim(0, max_height * 1.4)

                                    translated_label = metric_translations.get(metric, metric)
                                    ax.set_ylabel(translated_label)
                                    ax.set_title(translated_label)
                                    ax.grid(axis='y', linestyle='--', alpha=0.5, zorder=0)
                                    for spine in ax.spines.values():
                                        spine.set_edgecolor('black')
                                        spine.set_linewidth(0.5)
                                    if hdparm_data and hdparm_stat and "bandwidth" in metric.lower() and not combine_storage_types:
                                        if storage and storage.lower() in hdparm_data:
                                            device_data = hdparm_data[storage.lower()]
                                            hdparm_val = device_data.get("Bandwidth (MiB/s)", {}).get(hdparm_stat)
                                            if hdparm_val:
                                                ylim = ax.get_ylim()
                                                label_text = f'hdparm {storage} ({hdparm_val:.1f} MiB/s)'
                                                if hdparm_val <= ylim[1]:
                                                    ax.axhline(hdparm_val, color='#8B0000', linestyle='--', linewidth=1.5, alpha=0.5, label=label_text)
                                                    ax.legend(fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))

                                                else:
                                                    # Dodaj "fake" obiekt do legendy
                                                    legend_line = Line2D([0], [0], color='#8B0000', linestyle='--', alpha=0.5, linewidth=1.5)
                                                    ax.legend([legend_line], [label_text], fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))
                                        elif debug:
                                            print(f"‚ö†Ô∏è Brak danych hdparm dla {storage} w {metric}")

                            plt.tight_layout(rect=[0, 0, 1, 0.96])
                            if save and save_dir:
                                os.makedirs("wykresy/"+save_dir, exist_ok=True)
                                safe_combined_label = combined_label.replace(" ", "").replace("/", "")
                                safe_compression_label = "combined" if combine_compression_types else compression.lower()
                                safe_file_systems = "_".join([fs.replace("_", "") for fs in file_systems])
                                safe_snapshots = "_".join([sp.replace("_", "") for sp in snapshots]) if combine_snapshots else "default"
                                filename = f"syntetyk_{safe_file_systems}_{safe_combined_label.lower()}_{block_size.lower()}_{safe_compression_label}_{safe_snapshots}_{workload.lower()}.png"
                                filepath = os.path.join("wykresy/"+save_dir, filename)
                                fig.savefig(filepath, bbox_inches='tight')
                                if caption_autogen:
                                    caption = generate_caption(
                                        workload=workload,
                                        storage=storage,
                                        block_size=block_size,
                                        compression=compression,
                                        snapshot=snapshot,
                                        combine_storage_types=combine_storage_types,
                                        combine_compression_types=combine_compression_types,
                                        combine_snapshots=combine_snapshots,
                                        storage_list=storage_types,
                                        compression_list=compression_types,
                                        snapshot_list=snapshots,
                                        workload_translations=workload_translations,
                                        storage_type_translations=storage_type_translations,
                                        compression_translations=compression_translations,
                                    )
                                else:
                                    caption = None
                                print_latex_image(save_dir, filename, caption=caption)
                                if debug:
                                    print(f"üìÅ Zapisano wykres do pliku: {filepath}")
                            else:
                                plt.show()



Funkcja do generowania tabelki fio

In [None]:
def extract_row_data(data, workload, columns, file_systems=None, storage_types=None, block_sizes=None,
                     compressions=None, snapshots=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size in block_sizes if block_sizes else block_data.keys():
            block_entry = block_data.get(block_size) or block_data.get("default")
            if not block_entry:
                continue
            for compression, storage_data in block_entry.items():
                if compressions and compression not in compressions:
                    continue
                for storage, snapshot_data in storage_data.items():
                    if storage_types and storage.upper() not in [s.upper() for s in storage_types]:
                        continue
                    for snapshot, workloads in snapshot_data.items():
                        if snapshots and snapshot not in snapshots:
                            continue
                        if workload not in workloads:
                            continue

                        translated_block_size = "domy≈õlny" if block_size == "default" else block_size
                        translated_compression = compression_translations.get(compression, compression)
                        translated_storage = storage_type_translations.get(storage.upper(), storage)
                        translated_snapshot = snapshot
                        translated_fs = fs

                        row = [
                            translated_fs,
                            translated_block_size,
                            translated_compression,
                            translated_storage,
                            translated_snapshot
                        ]

                        for col in columns[5:]:  # Skip translated base columns
                            try:
                                storage_col, metric_stat = col.split(" ", 1)
                                metric, stat = metric_stat.rsplit(" ", 1)
                            except ValueError:
                                print(f"Nie uda≈Ço siƒô sparsowaƒá kolumny: {col}")
                                continue
                            
                            metric_key = metric
                            value = "N/A"
                            if storage.lower() == storage_col.lower():
                                value = workloads[workload].get(metric_key, {}).get(stat.lower(), "N/A")
                            row.append(value)
                        rows.append(row)
    return rows



def generate_columns(metrics, stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"]):
    columns = ["System plik√≥w", "Rozmiar bloku", "Kompresja", "Typ no≈õnika", "Migawka"]
    for storage in storage_types:
        for metric in metrics:
            if "Bandwidth" in metric:
                base_metric = f"{metric} (MiB/s)"
            elif "Latency" in metric:
                base_metric = f"{metric} (ms)"
            else:
                base_metric = metric
            for stat in stats:
                columns.append(f"{storage} {base_metric} {stat}")
    return columns



def display_performance_metrics(data, workloads, metrics, stats=["MIN", "AVG", "MAX"],
                                storage_types=["HDD", "SSD", "NVME"], file_systems=None,
                                block_sizes=None, compressions=None, snapshots=None):
    for workload in workloads:
        columns = generate_columns(metrics, stats, storage_types)
        rows = extract_row_data(data, workload, columns, file_systems, storage_types, block_sizes, compressions, snapshots)
        df = pd.DataFrame(rows, columns=columns)
        display(df.style.set_caption(f"Performance Metrics: {workload.capitalize()}").format(precision=3))


# Example usage
if tests == "article":
    workloads = ["database_article", "seq_read_article", "seq_write_article"] 
elif tests == "zwykle":
    workloads = ["database", "multimedia", "webserver", "archive"]

metrics = {
    'database_article': ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
    'seq_write_article': ["Bandwidth WRITE", "IOPS WRITE", "Latency WRITE"],
    'seq_read_article': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    'database': ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
    'archive': ["Bandwidth WRITE", "IOPS WRITE", "Latency WRITE"],
    'multimedia': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    'webserver': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    "default": ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
}
block_sizes = ["default"]  # Specify block sizes to display
#storage_types=["hdd", "stripe2hdd", "stripe4hdd", "mirror", "raidz1", "raidz2", "raid10"]
storage_types=["hdd"]
# Generate and display tables for each workload
file_systems = ["ext4", "xfs", "btrfs", "zfs"]
compression_types = ["none","lz4"]
snapshots = ["0"]  # Specify snapshots to display

direct_values = ["0", "1"]  # Direct values to display
for workload in workloads:
  
    workload_metrics = metrics.get(workload, metrics["default"])
    display_performance_metrics(fio_resultsdict, [workload], workload_metrics, block_sizes=block_sizes, storage_types=storage_types, direct_values=direct_values, stats = ["AVG"], file_systems = file_systems,compressions=compression_types, snapshots=snapshots)

In [None]:

# Example usage
metrics = [
    {"name": "Bandwidth (MiB/s)", "read": "Bandwidth READ (MiB/s)", "write": "Bandwidth WRITE (MiB/s)"},
    #{"name": "IOPS", "read": "IOPS READ", "write": "IOPS WRITE"}, 
    #{"name": "Latency (ms)", "read": "Latency READ (ms)", "write": "Latency WRITE (ms)"},
]

if tests == "article":
    workloads = ["database_article", "seq_read_article", "seq_write_article"]
elif tests == "zwykle":
    workloads = ["database", "multimedia", "webserver", "archive"]

#storage_types = ['HDD','RAID02HDD', 'RAID04HDD', 'RAID1', 'RAID5', 'RAID6', 'RAID10']
#storage_types = ["hdd", "stripe2hdd", "stripe4hdd", "mirror", "raidz1", "raidz2", "raid10"]
storage_types = ["HDD"]  # Lista typ√≥w no≈õnik√≥w do analizy

block_sizes = ['default']  

file_systems = ["zfs","zfs_nowa_wersja","zfs_nocache"]  #['exfat','ext4', 'xfs', 'btrfs', "f2fs", "zfs", "zfs_nocache"]

#workloads = ["database"]

#compression_types = ["none","zlib_1", "zlib_3", "zlib-9", "zstd_1", "zstd_1", "zstd_3", "zstd_9", "zstd_15"]  # Kompresja brfs
#compression_types = ["none", "gzip-1", "gzip-3", "gzip-9", "lz4"]  # Kompresja zfs
compression_types = ["none"]
#snapshots= ["0","120", "240","360","480","600","720","840","960","1080","1200"]  # Lista migawk√≥w do analizy, je≈õli jest dostƒôpna
snapshots = ["0"] 
direct_values = ["0","1"]
#plot_performance_metrics(fio_resultsdict, metrics, storage_types, block_sizes, include_min_max=True, workload=workload, hdparm_data=hdparm_resultsdict, hdparm_stat='avg' )
plot_performance_metrics(fio_resultsdict, metrics, storage_types, block_sizes, direct_values=direct_values, compression_types=compression_types, 
                            include_min_max=False, include_std=True,
                            workloads=workloads, combine_storage_types=False,
                            combine_compression_types=False, file_systems=file_systems,
                            hdparm_data=hdparm_resultsdict, hdparm_stat='max', 
                            snapshots=snapshots, combine_snapshots=False,
                            preserve_ylim=True, debug=True,
                            save_dir="wykresy_kompresja", save=False, caption_autogen=False,
                            combine_direct_values=True)



In [None]:
# Function to generate all possible columns
def generate_columns(metrics, stats=["MIN", "MAX", "AVG"], storage_types=["HDD", "SSD", "NVME"], file_systems=None):
    columns = ["File System", "Block Size"]  # Include Block Size in columns
    for storage in storage_types:
        for metric in metrics:
            for stat in stats:
                columns.append(f"{storage} {metric} {stat}")
    return columns

def extract_row_data(data, columns, file_systems=None, block_sizes=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size, devices in block_data.items():  # Iterate over block sizes
            if block_sizes and block_size not in block_sizes:
                continue
            row = [fs, block_size]  # Add File System and Block Size to the row
            for col in columns[2:]:  # Skip File System and Block Size
                if len(col.split()) > 3:
                    col = col.split()
                    storage, metric, stat = col[0], col[1] + ' ' + col[2], col[3]
                else:
                    storage, metric, stat = col.split(" ", 2)
                metric_key = f"{metric} (MiB/s)" if "Bandwidth" in metric else metric
                # Extract value
                value = "N/A"
                for device_type, workloads in devices.items():
                    if device_type.lower() == storage.lower():
                        for operation, metrics in workloads.items():
                            if metric_key in metrics:
                                value = metrics[metric_key].get(stat.lower(), "N/A")
                                break
                row.append(value)
            rows.append(row)
    return rows

# Example data
# Generate and display tables for dd_resultsdict
columns = generate_columns(["Bandwidth READ", "Bandwidth WRITE"], stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"])
rows = extract_row_data(dd_resultsdict, columns, block_sizes=["4096"])  # Specify block sizes
df = pd.DataFrame(rows, columns=columns)
display(df.style.set_caption("Performance Metrics: DD Results").format(precision=3))

In [None]:

# Plot performance metrics for dd data
plot_performance_metrics(dd_resultsdict, metrics=["Bandwidth READ (MiB/s)", "Bandwidth WRITE (MiB/s)"], storage_types=storage_types, block_sizes=['4096']
, include_min_max=True, hdparm_data=hdparm_resultsdict, hdparm_stat='max' )

#TODO dodanie uniwersalnej funkcji fo tabelek

In [None]:
import os
import re
import numpy as np
from collections import defaultdict

def parse_arcstats_before_after(filepath):
    with open(filepath, 'r') as f:
        content = f.read()
    before_match = re.search(r'===== ARC STATS BEFORE TEST =====\n(.*?)\n===== ARC STATS AFTER TEST =====', content, re.S)
    after_match = re.search(r'===== ARC STATS AFTER TEST =====\n(.*)', content, re.S)
    if not before_match or not after_match:
        return None

    def parse_lines(lines):
        stats = {}
        for line in lines.splitlines():
            match = re.match(r'(\S+)\s+\d+\s+(\d+)', line)
            if match:
                key, value = match.groups()
                stats[key] = int(value)
        return stats

    before_stats = parse_lines(before_match.group(1))
    after_stats = parse_lines(after_match.group(1))

    return {
        'ARC': {
            'hits': after_stats.get('hits', 0) - before_stats.get('hits', 0),
            'misses': after_stats.get('misses', 0) - before_stats.get('misses', 0),
            'size': after_stats.get('size', 0),
            'limit': after_stats.get('c', 0)
        },
        'L2ARC': {
            'hits': after_stats.get('l2_hits', 0) - before_stats.get('l2_hits', 0),
            'misses': after_stats.get('l2_misses', 0) - before_stats.get('l2_misses', 0),
            'size': after_stats.get('l2_size', 0) - before_stats.get('l2_size', 0),
        }
    }


def collect_arcstats_raw(root_folder, program_type='fio_results'):
    results = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(list)
                )
            )
        )
    )

    for dirpath, _, filenames in os.walk(root_folder):
        for filename in filenames:
            if not filename.startswith('arcstats_') or not filename.endswith('.log'):
                continue

            filepath = os.path.join(dirpath, filename)
            workload_match = re.match(r'arcstats_(.*?)_test\.log', filename)
            workload = workload_match.group(1) if workload_match else 'unknown'

            parts = dirpath.split(os.sep)
            if len(parts) < 4:
                continue

            filesystem_parts = parts[1].replace(f'{program_type}_', '').split('_')
            if len(filesystem_parts) >= 2:
                filesystem = '_'.join(filesystem_parts[:-1])
                storage = filesystem_parts[-1]
            else:
                filesystem = filesystem_parts[0]
                storage = 'unknown'

            blocksize = re.search(r'block_size_(.*?)_compression', parts[2]).group(1)
            compression = re.search(r'compression_(.*)', parts[2]).group(1)
            lab = parts[3]

            stats = parse_arcstats_before_after(filepath)
            if stats:
                results[filesystem][blocksize][compression][storage][workload].append({
                    'lab': lab,
                    'ARC': stats['ARC'],
                    'L2ARC': stats['L2ARC']
                })

    return results


# Uruchomienie
arcstats_raw_results = collect_arcstats_raw('../wyniki_zwykle_zfs_stats')
arcstats_raw_results_limit = collect_arcstats_raw('../wyniki_zwykle_zfs_stats_limit')

# Zamiana kluczy zfs -> zfs_limit oraz zfs_l2arc -> zfs_l2arc_limit w arcstats_raw_results_limit
if 'zfs' in arcstats_raw_results_limit:
    arcstats_raw_results_limit['zfs_limit'] = arcstats_raw_results_limit.pop('zfs')
if 'zfs_l2arc' in arcstats_raw_results_limit:
    arcstats_raw_results_limit['zfs_l2arc_limit'] = arcstats_raw_results_limit.pop('zfs_l2arc')

deep_merge_results(arcstats_raw_results, arcstats_raw_results_limit)
# Przyk≈Çadowe wypisanie
for fs, fs_data in arcstats_raw_results.items():
    for block, block_data in fs_data.items():
        for comp, comp_data in block_data.items():
            for storage, storage_data in comp_data.items():
                for workload, data in storage_data.items():
                    print(f"{fs}/{block}/{comp}/{storage}/{workload}: {data}")


In [None]:
import matplotlib.pyplot as plt
import os

def plot_arcstats_pie_from_results(arcstats_results, save_dir=None,
                                   workloads=None, file_systems=None,
                                   compressions=None, block_sizes=None,
                                   storage_types=None,
                                   lab=None,
                                   size_unit='MB',
                                   debug=True):
    plt.style.use('ggplot')  # Nowoczesny styl

    size_divisor = 1024**2 if size_unit.upper() == 'MB' else 1024**3
    size_unit_label = 'MiB' if size_unit.upper() == 'MB' else 'GiB'

    for fs, fs_data in arcstats_results.items():
        if file_systems and fs not in file_systems:
            continue
        for blocksize, block_data in fs_data.items():
            if block_sizes and blocksize not in block_sizes:
                continue
            for compression, comp_data in block_data.items():
                if compressions and compression not in compressions:
                    continue
                for storage, storage_data in comp_data.items():
                    if storage_types and storage not in storage_types:
                        continue
                    for workload, entries in storage_data.items():
                        if workloads and workload not in workloads:
                            continue

                        # üîé Wyb√≥r laboratorium
                        if lab in [None, 'median', 'best', 'worst']:
                            ratios = []
                            for e in entries:
                                hits = e['ARC']['hits']
                                misses = e['ARC']['misses']
                                total = hits + misses
                                ratio = hits / total if total > 0 else 0
                                ratios.append((ratio, e))
                            ratios.sort(key=lambda x: x[0])
                            if lab == 'best':
                                selected = ratios[-1][1]
                            elif lab == 'worst':
                                selected = ratios[0][1]
                            else:  # median lub None
                                selected = ratios[len(ratios) // 2][1]
                        else:
                            selected = next((e for e in entries if e['lab'] == lab), None)
                            if not selected:
                                print(f"‚ö†Ô∏è Brak wynik√≥w dla lab {lab} w {workload}")
                                continue

                        if debug:
                            print(f"üü© Rysujƒô: {fs}/{blocksize}/{compression}/{storage}/{workload} | lab={selected['lab']}")

                        fig, axs = plt.subplots(1, 2, figsize=(12, 6))

                        for idx, cache_type in enumerate(['ARC', 'L2ARC']):
                            hits = selected[cache_type]['hits']
                            misses = selected[cache_type]['misses']
                            size = selected[cache_type]['size']
                            limit = selected[cache_type].get('limit', 0) if cache_type == 'ARC' else None

                            if hits == 0 and misses == 0:
                                axs[idx].axis('off')
                                axs[idx].text(0.5, 0.5, f'Brak danych dla {cache_type}', ha='center', va='center', fontsize=14)
                                continue

                            wedges, texts, autotexts = axs[idx].pie(
                                [hits, misses],
                                labels=['Hits', 'Misses'],
                                autopct='%1.1f%%',
                                startangle=90,
                                colors=['#4CAF50', '#F44336'],
                                wedgeprops={'edgecolor': 'white', 'linewidth': 1.5}
                            )
                            for text in texts + autotexts:
                                text.set_fontsize(12)

                            size_val = round(size / size_divisor, 2)
                            title_line = f"{cache_type}\nHits: {hits:,} | Misses: {misses:,}\n"
                            if limit and limit > 0:
                                limit_val = round(limit / size_divisor, 2)
                                usage_pct = round(100 * size / limit, 1)
                                title_line += f"Size: {size_val}/{limit_val} {size_unit_label} ({usage_pct}%)"
                            else:
                                title_line += f"Size: {size_val} {size_unit_label}"
                            axs[idx].set_title(title_line, fontsize=13)

                        fig.suptitle(
                            f"{workload.capitalize()} | {fs.upper()} | Blocksize: {blocksize} | Compression: {compression} | Storage: {storage} | Lab: {selected['lab']}",
                            fontsize=14,
                            y=1.02
                        )
                        plt.subplots_adjust(top=0.85)
                        plt.tight_layout()

                        if save_dir:
                            os.makedirs(save_dir, exist_ok=True)
                            safe_fs = fs.replace("/", "_")
                            safe_file = f"{safe_fs}_{blocksize}_{compression}_{storage}_{workload}_{selected['lab']}.png"
                            path = os.path.join(save_dir, safe_file)
                            plt.savefig(path, bbox_inches='tight', dpi=150)
                            print(f"üìÅ Zapisano wykres: {path}")
                        else:
                            plt.show()


In [None]:
plot_arcstats_pie_from_results(
    arcstats_raw_results,
    workloads=['database', 'webserver', 'multimedia', 'archive'],
    file_systems=['zfs_limit'],
    lab='worst',
    compressions=['none'],
    block_sizes=['default'],
    storage_types=['hdd', 'nvme'],
    size_unit='GB',  # Albo 'MB'
    debug=False
)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

def plot_arcstats_bar_from_results_final(arcstats_results, save_dir=None, save=False,
                                         workloads=None, file_systems=None,
                                         compressions=None, block_sizes=None,
                                         storage_types=None,
                                         lab='best', same_lab_for_all=False,
                                         show_lab=False,
                                         size_unit='MB', debug=True,
                                         fill_to_100=False):

    size_divisor = 1024**2 if size_unit.upper() == 'MB' else 1024**3
    size_unit_label = 'MiB' if size_unit.upper() == 'MB' else 'GiB'

    for storage in storage_types or []:
        for compression in compressions or []:
            for blocksize in block_sizes or []:
                for workload in workloads or []:
                    bar_labels = []
                    arc_ratios = []
                    l2arc_ratios = []
                    size_info = []
                    arc_hits_list = []
                    l2arc_hits_list = []
                    arc_miss_list = []
                    l2arc_miss_list = []

                    selected_lab = None

                    # Wyb√≥r wsp√≥lnego laba je≈õli trzeba
                    if same_lab_for_all:
                        all_entries = []
                        for fs in file_systems or []:
                            entries = arcstats_results.get(fs, {}).get(blocksize, {}).get(compression, {}).get(storage, {}).get(workload, [])
                            all_entries.extend(entries)
                        if not all_entries:
                            continue
                        ratios = [(e['ARC']['hits'] / (e['ARC']['hits'] + e['ARC']['misses']), e)
                                  for e in all_entries if (e['ARC']['hits'] + e['ARC']['misses']) > 0]
                        ratios.sort(key=lambda x: x[0])
                        if lab == 'best':
                            selected_lab = ratios[-1][1]['lab']
                        elif lab == 'worst':
                            selected_lab = ratios[0][1]['lab']
                        else:
                            selected_lab = ratios[len(ratios)//2][1]['lab'] if lab == 'median' else lab

                    # Zbierz dane dla ka≈ºdego fs
                    for fs in file_systems or []:
                        entries = arcstats_results.get(fs, {}).get(blocksize, {}).get(compression, {}).get(storage, {}).get(workload, [])
                        if not entries:
                            continue
                        if same_lab_for_all:
                            selected = next((e for e in entries if e['lab'] == selected_lab), None)
                        else:
                            ratios = [(e['ARC']['hits'] / (e['ARC']['hits'] + e['ARC']['misses']), e)
                                      for e in entries if (e['ARC']['hits'] + e['ARC']['misses']) > 0]
                            ratios.sort(key=lambda x: x[0])
                            if not ratios:
                                continue
                            if lab == 'best':
                                selected = ratios[-1][1]
                            elif lab == 'worst':
                                selected = ratios[0][1]
                            elif lab == 'median':
                                selected = ratios[len(ratios)//2][1]
                            else:
                                selected = next((e for e in entries if e['lab'] == lab), None)
                        if not selected:
                            continue

                        total = selected['ARC']['hits'] + selected['ARC']['misses']
                        arc_ratio = 100 * selected['ARC']['hits'] / total if total > 0 else 0

                        l2_total = selected['L2ARC']['hits'] + selected['L2ARC']['misses']
                        l2_ratio = 100 * selected['L2ARC']['hits'] / l2_total if l2_total > 0 else None

                        if show_lab:
                            label_with_lab = f"{fs_translations.get(fs,fs)} ({selected['lab']})"
                        else:
                            label_with_lab = fs_translations.get(fs,fs)
                        bar_labels.append(label_with_lab)
                        arc_ratios.append(arc_ratio)
                        l2arc_ratios.append(l2_ratio)
                        size_val = round(selected['ARC']['size'] / size_divisor, 2)
                        limit_val = round(selected['ARC'].get('limit', 0) / size_divisor, 2)
                        size_info.append(f"{size_val}/{limit_val} {size_unit_label}")
                        arc_hits_list.append(selected['ARC']['hits'])
                        l2arc_hits_list.append(selected['L2ARC']['hits'] if 'L2ARC' in selected and selected['L2ARC']['hits'] else None)
                        arc_miss_list.append(selected['ARC']['misses'])
                        l2arc_miss_list.append(selected['L2ARC']['misses'] if 'L2ARC' in selected and selected['L2ARC']['misses'] else None)

                    if not bar_labels:
                        continue

                    l2arc_exists = any(r is not None for r in l2arc_ratios)

                    x = np.arange(len(bar_labels))
                    width = 0.35 if l2arc_exists else 0.5

                    fig, ax = plt.subplots(figsize=(10, 6))

                    # Kolory
                    arc_color = '#6BA368'
                    arc_bg_color = '#E6E6E6'
                    l2arc_color = '#4C72B0'
                    l2arc_bg_color = '#D0D0D0'
                    fill_arc_color = '#FF9999'    # ja≈õniejszy czerwony
                    fill_l2arc_color = '#FF4D4D'  # ciemniejszy czerwony

                    # ARC s≈Çupki
                    bars_arc = ax.bar(x, arc_ratios, width, color=arc_color, label='ARC - trafienia', edgecolor='black', zorder=3)
                    ax.bar(x, [100]*len(bar_labels), width, color=arc_bg_color, alpha=0.3, zorder=0)
                    if fill_to_100:
                        fill_arc_heights = [100 if m and m > 0 else 0 for m in arc_miss_list]
                        ax.bar(x, fill_arc_heights, width, color=fill_arc_color, alpha=0.3, zorder=1, label='ARC - chybienia')

                    # L2ARC s≈Çupki
                    if l2arc_exists:
                        bars_l2arc = ax.bar(x + width, [r if r is not None else 0 for r in l2arc_ratios],
                                            width, color=l2arc_color, label='L2ARC - trafienia', edgecolor='black', zorder=3)
                        ax.bar(x + width, [100]*len(bar_labels), width, color=l2arc_bg_color, alpha=0.3, zorder=0)
                        if fill_to_100:
                            fill_l2arc_heights = [100 if m and m > 0 else 0 for m in l2arc_miss_list]
                            ax.bar(x + width, fill_l2arc_heights, width, color=fill_l2arc_color, alpha=0.3, zorder=1, label='L2ARC - chybienia')


                    # Procenty + ilo≈õƒá w s≈Çupkach
                    bar_data = [(bars_arc, arc_ratios, arc_hits_list, arc_miss_list)]
                    # Je≈õli L2ARC istnieje, dodaj jƒÖ
                    if l2arc_exists:
                        bar_data.append((bars_l2arc, l2arc_ratios, l2arc_hits_list, l2arc_miss_list)) 

                    for bars, ratios, hits_list, miss_list in bar_data:
                        for bar, ratio, hits, misses in zip(bars, ratios, hits_list, miss_list):
                            if ratio is None:
                                continue
                            bar_x_center = bar.get_x() + bar.get_width() / 2

                            if ratio > 5:
                                text = f"{ratio:.1f}%\n({hits:,})"
                                font_size = 9 if len(bar_data)<2 else 7
                                ax.text(bar_x_center, ratio / 2,
                                        text,
                                        ha='center', va='center',
                                        color='white', fontsize=font_size, fontweight='bold')

                            # Dodaj etykietƒô dla "czerwonej" czƒô≈õci je≈õli jest >5%
                            miss_ratio = 100 - ratio
                            if fill_to_100 and miss_ratio > 5:
                                text = f"{miss_ratio:.1f}%\n({misses:,})"
                                font_size = 9 if len(bar_data)<2 else 7
                                ax.text(bar_x_center, ratio + miss_ratio / 2,
                                        text,
                                        ha='center', va='center',
                                        color='black', fontsize=font_size, fontweight='bold')



                    # O≈õ X
                    ax.set_xticks(x + (width/2 if l2arc_exists else 0))
                    ax.set_xticklabels(bar_labels, rotation=45, ha='right', fontsize=10)

                    # Opisy
                    ax.set_ylim(0, 110)
                    ax.set_ylabel('Wska≈∫nik trafie≈Ñ (%)', fontsize=12)

                    workload_label = workload_translations.get(workload, workload)
                    compression_label = compression_translations.get(compression, compression)
                    storage_label = storage_type_translations.get(storage, storage)
                    blocksize_label = "domy≈õlny" if blocksize == "default" else blocksize

                    ax.set_title(
                        f"{workload_label.upper()} | kompresja: {compression_label} | blok: {blocksize_label} | no≈õnik: {storage_label}",
                        fontsize=13, pad=15
                    )

                    # Rozmiar ARC nad s≈Çupkami
                    for i, txt in enumerate(size_info):
                        ax.text(x[i] + (width/2 if l2arc_exists else 0), 105, f"Rozmiar: {txt}",
                                ha='center', va='bottom', fontsize=9, color='black')

                    ax.legend(loc='lower right', fontsize=9)
                    ax.grid(axis='y', linestyle='--', alpha=0.5, zorder=0)

                    plt.tight_layout()
                    if save_dir and save:
                        safe_file_systems = "_".join([fs.replace("_", "") for fs in file_systems])
                        filename = f"arcstats_{safe_file_systems}_{storage}_{blocksize.lower()}_{compression}_{workload.lower()}.png"
                        print_latex_image(save_dir, filename)
                        os.makedirs("wykresy/"+save_dir, exist_ok=True)
                        path = os.path.join("wykresy/"+save_dir, filename)
                        plt.savefig(path, bbox_inches='tight', dpi=150)
                        if debug:
                            print(f"üìÅ Zapisano wykres: {path}")
                        plt.show()
                    else:
                        plt.show()


In [None]:
plot_arcstats_bar_from_results_final(
    arcstats_results=arcstats_raw_results,
    workloads=['database', 'mulitmedia', 'archive', 'webserver'],  # Je≈õli chcesz ograniczyƒá do wybranych
    file_systems=['zfs', 'zfs_limit', 'zfs_l2arc', 'zfs_l2arc_limit'],  # Je≈õli chcesz ograniczyƒá do wybranych
    compressions=['none'],
    block_sizes=['default'],  # Je≈õli chcesz ograniczyƒá do wybranych
    storage_types=['nvme', 'hdd'],  # Osobne wykresy dla tych storage
    lab='median',    # Kt√≥ry komputer ma wybraƒá do pokazania
    show_lab=False,  # Czy pokazywaƒá lab w systemach do debugowania
    size_unit='GB',
    same_lab_for_all=False,  # "same" lub "individual"
    debug=False,
    fill_to_100=True,
    save=False,
    save_dir="arcstats"  # ≈öcie≈ºka do katalogu, gdzie zapisaƒá wykresy
)
