Czytanie plików fio i dd

In [None]:
import os
import re
from collections import defaultdict
import glob
import numpy as np 
import pandas as pd
import json
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.patches import Patch
from matplotlib.lines import Line2D


In [None]:
def print_dict_tree(d, indent=0, max_depth=None):
    """Rekurencyjnie wypisuje strukturę kluczy zagnieżdżonego słownika do zadanej głębokości."""
    if not isinstance(d, dict) or (max_depth is not None and indent >= max_depth):
        return
    for key in d:
        print('  ' * indent + str(key))
        print_dict_tree(d[key], indent + 1, max_depth)

# Przykład użycia:
# print_dict_tree(fio_resultsdict, max_depth=3)


def print_latex_image(image_path, image_name, caption=None, label=None, width=None):
    if image_name.endswith('.png'):
        image_name = image_name[:-4]
    width_str = f"[width={width}]" if width else "[width=\\textwidth]"
    caption_str = f"\\caption{{{caption}}}" if caption else "\\caption{TODO caption}" 
    label_str = f"\\label{{{label}}}" if label else f"\\label{{fig:{image_name}}}"
    latex_code = (
        "\\begin{figure}[H]\n"
        "    \\centering\n"
        f"    \\includegraphics{width_str}{{images/{image_path}/{image_name}}}\n"
        f"    {caption_str}\n"
        f"    {label_str}\n"
        "\\end{figure}\n"
    )
    print(latex_code)


In [None]:
def parse_fio_results(file_path):
    def parse_text_fio(file):
        # Regular expressions
        bandwidth_regex = re.compile(r'WRITE: bw=(\d+(?:\.\d+)?)([MK]iB/s)')
        bandwidth_read_regex = re.compile(r'READ: bw=(\d+(?:\.\d+)?)([MK]iB/s)')
        iops_regex = re.compile(r'write: IOPS=(\d+)')
        iops_read_regex = re.compile(r'read: IOPS=(\d+)')
        latency_regex = re.compile(r'lat (\([mu]sec\)): min=\d+\.?\d*[km]?, max=\d+\.?\d*[km]?, avg=(\d+\.\d+[km]?), stdev=\d+\.?\d*')

        def convert_bandwidth(value, unit):
            value = float(value)
            if unit == "KiB/s":
                return value / 1024
            return value

        results = {}
        last = 'read'
        for line in file:
            if 'write' in line:
                last = 'write'
            elif 'read' in line:
                last = 'read'

            if (bw := bandwidth_regex.search(line)):
                value, unit = bw.groups()
                results['Bandwidth WRITE (MiB/s)'] = convert_bandwidth(value, unit)
            if (bw := bandwidth_read_regex.search(line)):
                value, unit = bw.groups()
                results['Bandwidth READ (MiB/s)'] = convert_bandwidth(value, unit)
            if (iops := iops_regex.search(line)):
                results['IOPS WRITE'] = float(iops.group(1))
            if (iops := iops_read_regex.search(line)):
                results['IOPS READ'] = float(iops.group(1))
            if (lat := latency_regex.search(line)):
                lat_val = float(lat.group(2))
                if lat.group(1) == '(usec)':
                    lat_val /= 1000
                results[f'Latency {last.upper()} (ms)'] = lat_val

        return results

    def parse_json_fio(data):
        results = {}
        for job in data.get('jobs', []):
            for rw_type in ['read', 'write']:
                if job.get(rw_type, {}).get('iops', 0) > 0:
                    iops = job[rw_type]['iops']
                    bw_kib = job[rw_type]['bw']
                    latency_ns = job[rw_type].get('lat_ns', {}).get('mean', 0)

                    results[f'IOPS {rw_type.upper()}'] = round(iops, 2)
                    results[f'Bandwidth {rw_type.upper()} (MiB/s)'] = round(bw_kib / 1024, 2)
                    results[f'Latency {rw_type.upper()} (ms)'] = round(latency_ns / 1_000_000, 3)

        return results

    with open(file_path, 'r') as f:
        first_char = f.read(1)
        f.seek(0)
        if first_char == '{':
            try:
                data = json.load(f)
                return parse_json_fio(data)
            except json.JSONDecodeError:
                f.seek(0)
                return parse_text_fio(f)
        else:
            return parse_text_fio(f)


def parse_dd_results(file_path):
    # Regular expressions
    bandwidth_regex = re.compile(r'(\d+(?:\.\d+)?) ([GMK]B/s)')
    time_regex = re.compile(r'(\d+(?:\.\d+)?) s')

    # Function to convert bandwidth to MiB/s
    def convert_bandwidth(value, unit):
        value = float(value)
        if unit == "KB/s":
            return value / 1024  # Convert KB/s to MiB/s
        elif unit == "MB/s":
            return value  # Already in MiB/s
        elif unit == "GB/s":
            return value * 1024  # Convert GB/s to MiB/s
        return value

    results = {}

    with open(file_path, 'r') as file:
        for line in file:
            # Match bandwidth
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                value, unit = bw_match.groups()
                if 'write' in file_path:
                    results['Bandwidth WRITE (MiB/s)'] = convert_bandwidth(value, unit)
                else:
                    results['Bandwidth READ (MiB/s)'] = convert_bandwidth(value, unit)

            # Match time
            time_match = time_regex.search(line)
            if time_match:
                results['Time (s)'] = float(time_match.group(1))

    return results


from collections import defaultdict
import os
import glob
import re

def extract_values(resultsfolder, file_names, parser, program_type):
    resultsdict = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict))))

    def extract_key(file_name):
        match = re.match(r"^[^_]+_(.*?)_test", file_name)
        return match.group(1) if match else os.path.splitext(file_name)[0]

    prepaths = [folder for folder in glob.glob(resultsfolder + '*/') if program_type in folder]
    for prepath in prepaths:
        folder_parts = prepath.split('\\')[-2].split('_')
        filesystem = '_'.join(folder_parts[2:-1])
        storage = folder_parts[-1]

        if program_type in ['fio_results', 'dd_results']:
            block_size_folders = [folder for folder in glob.glob(prepath + '*/')]
            for block_size_folder in block_size_folders:
                folder_name_parts = block_size_folder.split('\\')[-2].split('_')

                compression = 'none'  # domyślnie brak kompresji
                if len(folder_name_parts) >= 3 and folder_name_parts[0] == "block" and folder_name_parts[1] == "size":
                    block_size = folder_name_parts[2]
                    try:
                        compression_idx = folder_name_parts.index("compression")
                        compression = '_'.join(folder_name_parts[compression_idx + 1:])
                        if not compression:
                            compression = 'none'
                    except ValueError:
                        pass
                else:
                    print(f"Pomijanie folderu o nieoczekiwanej strukturze: {block_size_folder}")
                    continue

                folders = [folder for folder in glob.glob(block_size_folder + '*/')]
                cumulative_data = {}
                for folder in folders:
                    for file_name in file_names:
                        file_path = os.path.join(folder, file_name)
                        if os.path.exists(file_path):
                            try:
                                results = parser(file_path)
                                if results:
                                    test_key = extract_key(file_name)
                                    if test_key not in cumulative_data:
                                        cumulative_data[test_key] = defaultdict(list)
                                    for key, value in results.items():
                                        cumulative_data[test_key][key].append(value)
                            except Exception as e:
                                print(f"Błąd podczas parsowania {file_path}: {e}")
                        else:
                            print(f"Plik nie znaleziony: {file_path}")

                ranges = {}
                for test_key, metrics in cumulative_data.items():
                    if metrics:
                        ranges[test_key] = {
                            key: {
                                'median': round(np.median(values), 3),
                                'std': round(np.std(values), 3),
                                'min': round(min(values), 3),
                                'max': round(max(values), 3),
                                'avg': round(sum(values) / len(values), 2)
                            } if values else '-' for key, values in metrics.items()
                        }

                resultsdict[filesystem][block_size][compression][storage] = ranges
        else:
            # Programy bez rozmiaru bloku i kompresji (np. hdparm)
            folders = [folder for folder in glob.glob(prepath + '*/')]
            cumulative_data = {}
            for folder in folders:
                for file_name in file_names:
                    file_path = os.path.join(folder, file_name)
                    if os.path.exists(file_path):
                        try:
                            results = parser(file_path)
                            if results:
                                test_key = extract_key(file_name)
                                if test_key not in cumulative_data:
                                    cumulative_data[test_key] = defaultdict(list)
                                for key, value in results.items():
                                    cumulative_data[test_key][key].append(value)
                        except Exception as e:
                            print(f"Błąd podczas parsowania {file_path}: {e}")
                    else:
                        print(f"Plik nie znaleziony: {file_path}")

            ranges = {}
            for test_key, metrics in cumulative_data.items():
                if metrics:
                    ranges[test_key] = {
                        key: {
                            'median': round(np.median(values), 3),
                            'std': round(np.std(values), 3),
                            'min': round(min(values), 3),
                            'max': round(max(values), 3),
                            'avg': round(sum(values) / len(values), 2)
                        } if values else '-' for key, values in metrics.items()
                    }

            # Brak rozmiaru bloku — zapisujemy pod nazwą 'no_block_size' i compression='none'
            resultsdict[filesystem]['no_block_size']['none'][storage] = ranges

    return resultsdict



def parse_hdparm_results(file_path):
    # Regular expression to match the bandwidth
    bandwidth_regex = re.compile(r'Timing O_DIRECT disk reads: (\d+(?:\.\d+)?) MB in .* seconds = (\d+(?:\.\d+)) MB/sec')

    results = {}

    with open(file_path, 'r') as file:
        for line in file:
            # Match bandwidth
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                total_mb, bandwidth = bw_match.groups()
                results['Total Data Read (MB)'] = float(total_mb)
                results['Bandwidth (MiB/s)'] = float(bandwidth)

    return results
    
def extract_hdparm_values_by_device(resultsfolder, file_names, parser, group_by_computer=False):
    # Adjust the default dictionary structure based on whether grouping by computer
    resultsdict = (
        defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
        if group_by_computer
        else defaultdict(lambda: defaultdict(list))
    )

    # Iterate through all device type folders (e.g., hdparm_results_btrfs_hdd)
    prepaths = glob.glob(os.path.join(resultsfolder, '*/'))
    for prepath in prepaths:
        # Check if folder starts with 'hdparm'
        folder_name = os.path.basename(os.path.normpath(prepath))
        if not folder_name.startswith('hdparm'):
            continue  # skip non-hdparm folders

        device_type = prepath.split('_')[-1].lower().strip('\\').strip('/')
        
        # Iterate through configuration folders
        config_folders = glob.glob(os.path.join(prepath, '*/'))  # e.g., block_size_4096_compression_none/
        for config_folder in config_folders:
            # Iterate through computer folders (lab-sec-*)
            computer_folders = glob.glob(os.path.join(config_folder, '*/'))
            for computer_folder in computer_folders:
                computer_name = os.path.basename(os.path.normpath(computer_folder))
                
                for file_name in file_names:
                    file_path = os.path.join(computer_folder, file_name)
                    if os.path.exists(file_path):
                        try:
                            # Parse the file and collect results
                            results = parser(file_path)
                            if group_by_computer:
                                for key, value in results.items():
                                    resultsdict[device_type][computer_name][key].append(value)
                            else:
                                for key, value in results.items():
                                    resultsdict[device_type][key].append(value)
                        except Exception as e:
                            print(f"Error parsing {file_path}: {e}")
                    else:
                        print(f"File not found: {file_path}")

    # Aggregate results by calculating min, max, and avg for each metric
    aggregated_results = {}
    for device_type, computers_or_metrics in resultsdict.items():
        if group_by_computer:
            aggregated_results[device_type] = {}
            for computer, metrics in computers_or_metrics.items():
                aggregated_results[device_type][computer] = {
                    key: {
                        'median': round(np.median(values), 3),
                        'std': round(np.std(values), 3),
                        'min': round(min(values), 3),
                        'max': round(max(values), 3),
                        'avg': round(sum(values) / len(values), 2)
                    } if values else '-' for key, values in metrics.items()
                }
        else:
            aggregated_results[device_type] = {
                key: {
                    'median': round(np.median(values), 3),
                    'std': round(np.std(values), 3),
                    'min': round(min(values), 3),
                    'max': round(max(values), 3),
                    'avg': round(sum(values) / len(values), 2)
                } if values else '-' for key, values in computers_or_metrics.items()
            }

    return aggregated_results

In [None]:
tests = "zwykle"
resultsfolder = ''
fio_file_names = []

if tests == "article":
    resultsfolder = '../wyniki_50G_article/'
    fio_file_names = [
    'fio_database_article_test_output.txt',
    'fio_seq_read_article_test_output.txt',
    'fio_seq_write_article_test_output.txt',
    ]
elif tests == "zwykle":
    resultsfolder = '../wyniki_zwykle/'
    fio_file_names = [
        'fio_database_test_output.txt',
        'fio_multimedia_test_output.txt',
        'fio_webserver_test_output.txt',
        'fio_archive_test_output.txt',
    ]


fio_resultsdict = extract_values(resultsfolder, fio_file_names, parse_fio_results, program_type='fio_results')
print_dict_tree(fio_resultsdict, max_depth=4)

# Example usage for dd
dd_file_names = [
    'dd_read_test_output.txt',
    'dd_write_test_output.txt',
]

dd_resultsdict = extract_values(resultsfolder, dd_file_names, parse_dd_results, program_type='dd_results')
print(dd_resultsdict)

hdparm_file_names = [
    'hdparm_test_output.txt',
]

hdparm_resultsdict = extract_hdparm_values_by_device(resultsfolder, hdparm_file_names, parse_hdparm_results, group_by_computer=False)

print(hdparm_resultsdict)

In [None]:
def deep_merge_results(dict1, dict2):
    """Rekurencyjnie scala dict2 do dict1 na głębokość 3 poziomów."""
    for fs, block_sizes in dict2.items():
        if fs not in dict1:
            dict1[fs] = block_sizes
            continue
        for block_size, storages in block_sizes.items():
            if block_size not in dict1[fs]:
                dict1[fs][block_size] = storages
                continue
            for storage, workloads in storages.items():
                if storage not in dict1[fs][block_size]:
                    dict1[fs][block_size][storage] = workloads
                else:
                    # Jeśli istnieje, scal workloady (np. database, multimedia, itd.)
                    for workload, metrics in workloads.items():
                        if workload not in dict1[fs][block_size][storage]:
                            dict1[fs][block_size][storage][workload] = metrics
                        else:
                            # Jeśli istnieje, scal metryki (np. Bandwidth READ, IOPS, itd.)
                            dict1[fs][block_size][storage][workload].update(metrics)


if tests == "article":
    fio_file_names = [
    'fio_database_article_test_output.json',
    'fio_seq_read_article_test_output.json',
    'fio_seq_write_article_test_output.json',
    ]

    fio_resultsdict_raid= extract_values('../wyniki_raid_article_50G/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_raid_compression= extract_values('../wyniki_raid_article_50G_kompresja/', fio_file_names, parse_fio_results, program_type='fio_results')
    deep_merge_results(fio_resultsdict, fio_resultsdict_raid)
    deep_merge_results(fio_resultsdict, fio_resultsdict_raid_compression)
    print_dict_tree(fio_resultsdict, max_depth=3)

elif tests == "zwykle":
    fio_file_names = [
        'fio_database_test_output.json',
        'fio_multimedia_test_output.json',
        'fio_webserver_test_output.json',
        'fio_archive_test_output.json',
    ]
    #Scalanie różnych folderów wyników

    # Usuń dane dla zfs i zfs_nocache
    fio_resultsdict.pop('zfs', None)
    fio_resultsdict.pop('zfs_nocache', None)
    fio_resultsdict.pop('zfs_primary', None)

    # Wczytaj dane z drugiego folderu
    #fio_resultsdict_new = extract_values('../wyniki_zwykle_l2arc/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_zfs_stats = extract_values('../wyniki_zwykle_zfs_stats/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_kompresja = extract_values('../wyniki_zwykle_kompresja/', fio_file_names, parse_fio_results, program_type='fio_results')
    fio_resultsdict_raid = extract_values('../wyniki_raid/', fio_file_names, parse_fio_results, program_type='fio_results')
    # Dodaj dane z drugiego folderu do głównego zbioru
    #fio_resultsdict.update(fio_resultsdict_new)
    deep_merge_results(fio_resultsdict, fio_resultsdict_zfs_stats)
    deep_merge_results(fio_resultsdict, fio_resultsdict_kompresja)
    deep_merge_results(fio_resultsdict, fio_resultsdict_raid)
    print_dict_tree(fio_resultsdict, max_depth=3)


Funkcja do generowania wykresów

In [None]:
def darken_color(color, amount=0.7):
    """Zmniejsz jasność koloru RGB."""
    c = np.array(to_rgb(color))
    return tuple(np.clip(c * amount, 0, 1))

def plot_performance_metrics(data, metrics, storage_types, block_sizes,
                              include_min_max=False, include_std=False,
                              workload=None,
                              file_systems=None, colors=None,
                              hdparm_data=None, hdparm_stat='avg',
                              preserve_ylim=False, debug=False,
                              combine_storage_types=False, combine_compression_types=False, save_dir=None, compression_types=None):

    metric_translations = {
        "Bandwidth (MiB/s)": "Przepustowość (MiB/s)",
        "Bandwidth READ (MiB/s)": "Odczyt (MiB/s)",
        "Bandwidth WRITE (MiB/s)": "Zapis (MiB/s)",
        "IOPS": "IOPS",
        "IOPS READ": "IOPS - odczyt",
        "IOPS WRITE": "IOPS - zapis",
        "Latency (ms)": "Opóźnienie (ms)",
        "Latency READ (ms)": "Opóźnienie - odczyt (ms)",
        "Latency WRITE (ms)": "Opóźnienie - zapis (ms)"
    }

    workload_translations = {
        "database": "baza danych",
        "multimedia": "multimedia",
        "webserver": "serwer WWW",
        "archive": "archiwum"
    }

    storage_type_translations = {
        "NVME": "NVMe",
        "RAID02HDD": "RAID0 2 HDD",
        "RAID04HDD": "RAID0 3 HDD",
        "STRIPE4HDD": "Stripe 4 HDD",
        "STRIPE2HDD": "Stripe 2 HDD",
        "MIRROR": "Mirror",
    }

    compression_translations = {
    "none": "brak",
    "lz4": "lz4",
    "zstd": "zstd",
    }

    if colors is None:
        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'orange', '#FFD700', '#A0522D']
    if file_systems is None:
        file_systems = list(data.keys())

    for block_size in block_sizes:
        compression_list = compression_types if compression_types else ["none"]
        if combine_compression_types:
            compression_list = [None]
        for compression in compression_list:
            storage_list = storage_types if not combine_storage_types else [None]

            for storage in storage_list:
                prepared_data = {}

                for fs in file_systems:
                    if combine_storage_types:
                        fs_block_data = data.get(fs, {})
                        for st in storage_types:
                            combined_label = f"{fs} {storage_type_translations.get(st.upper(), st.upper())}"
                            for comp in (compression_types if combine_compression_types else [compression]):
                                storage_data = (
                                    fs_block_data.get(block_size, {}).get(comp, {}).get(st.lower()) or
                                    fs_block_data.get('default', {}).get(comp, {}).get(st.lower())
                                )
                                if not storage_data:
                                    print(f"⚠️ Pomijanie {fs} - brak danych dla storage '{st}' w block_size '{block_size}' lub 'default'")
                                    continue

                                workload_data = storage_data.get(workload)
                                if not isinstance(workload_data, dict):
                                    print(f"⚠️ Pomijanie {fs} - storage '{st}': brak danych dla workload '{workload}'")
                                    continue
                                label = f"{combined_label} ({compression_translations.get(comp, comp)})" if combine_compression_types else combined_label
                                prepared_data[label] = workload_data
                    else:
                        fs_block_data = data.get(fs, {}).get(block_size, {}) or data.get(fs, {}).get('default', {})
                        for comp in (compression_types if combine_compression_types else [compression]):
                            storage_data = fs_block_data.get(comp, {}).get(storage.lower())
                            if not storage_data:
                                continue
                            workload_data = storage_data.get(workload)
                            if not isinstance(workload_data, dict):
                                continue
                            label = f"{fs} ({compression_translations.get(comp, comp)})" if combine_compression_types else fs
                            prepared_data[label] = workload_data


                if not prepared_data:
                    print(f"⚠️ Brak danych po przetworzeniu dla compression={compression}, block_size={block_size}, workload={workload}")
                    continue

                plots = []
                for metric in metrics:
                    if isinstance(metric, dict):
                        read_metric = metric.get("read")
                        write_metric = metric.get("write")
                        first_key = next(iter(prepared_data), None)
                        if not first_key:
                            continue
                        sample = prepared_data[first_key]
                        if read_metric and write_metric and read_metric in sample and write_metric in sample:
                            plots.append(("grouped", metric["name"], read_metric, write_metric))
                        elif read_metric and read_metric in sample:
                            plots.append(("single", read_metric))
                        elif write_metric and write_metric in sample:
                            plots.append(("single", write_metric))
                        else:
                            plots.append(("single", metric["name"]))
                    else:
                        first_key = next(iter(prepared_data), None)
                        if first_key and metric in prepared_data[first_key]:
                            plots.append(("single", metric))

                fig, axs = plt.subplots(len(plots), 1, figsize=(10, 3 * len(plots)))
                if len(plots) == 1:
                    axs = [axs]

                # Zwiększ przestrzeń na tytuł, ale zachowaj kontrolę nad "plot area"
                fig.subplots_adjust(top=0.9, bottom=0.15, left=0.15, right=0.95)

                # Ustaw stały rozmiar plot area (w jednostkach figure-coordinates)
                fixed_ax_height = 0.6 / len(plots)  # rozdziel proporcjonalnie
                for i, ax in enumerate(axs):
                    ax.set_position([0.15, 0.9 - (i+1)*fixed_ax_height, 0.8, fixed_ax_height * 0.9])

                
                translated_workload = workload_translations.get(workload, workload)
                translated_block_size = "domyślny" if block_size == "default" else block_size

                combined_label = ", ".join(
                    [storage_type_translations.get(s.upper(), s.upper()) for s in storage_types]
                ) if combine_storage_types else storage_type_translations.get(storage.upper(), storage.upper())

                if combine_compression_types:
                    compression_label = ", ".join([compression_translations.get(c, c) for c in compression_types])
                else:
                    compression_label = compression_translations.get(compression, compression)

                fig.suptitle(
                    f'{translated_workload.capitalize() if workload else ""}'
                    f'\nTyp dysku: {combined_label}'
                    f'\nRozmiar bloku: {translated_block_size}'
                    f'\nKompresja: {compression_label}'
                )



                for i, plot in enumerate(plots):
                    ax = axs[i]

                    if plot[0] == "grouped":
                        _, base_name, read_metric, write_metric = plot
                        fs_labels = []
                        read_vals, write_vals = [], []
                        read_err, write_err = ([], []), ([], [])
                        color_map = {}

                        for idx, label in enumerate(prepared_data):
                            workload_data = prepared_data[label]
                            read_data = workload_data.get(read_metric, {})
                            write_data = workload_data.get(write_metric, {})

                            read_avg = read_data.get('avg')
                            write_avg = write_data.get('avg')
                            if read_avg is not None and write_avg is not None:
                                fs_labels.append(label)
                                read_vals.append(read_avg)
                                write_vals.append(write_avg)

                                if include_min_max:
                                    read_min = read_data.get('min', read_avg)
                                    read_max = read_data.get('max', read_avg)
                                    read_err[0].append(read_avg - read_min)
                                    read_err[1].append(read_max - read_avg)

                                    write_min = write_data.get('min', write_avg)
                                    write_max = write_data.get('max', write_avg)
                                    write_err[0].append(write_avg - write_min)
                                    write_err[1].append(write_max - write_avg)

                                elif include_std:
                                    read_std = read_data.get('std', 0)
                                    write_std = write_data.get('std', 0)
                                    read_err[0].append(read_std)
                                    read_err[1].append(read_std)
                                    write_err[0].append(write_std)
                                    write_err[1].append(write_std)


                                color_map[label] = colors[idx % len(colors)]

                        x = np.arange(len(fs_labels))
                        bar_width = 0.35
                        read_colors = [color_map[fs] for fs in fs_labels]
                        write_colors = [darken_color(color_map[fs]) for fs in fs_labels]

                        read_bars = ax.bar(x - bar_width/2, read_vals, bar_width,
                                        label='_Odczyt', color=read_colors,
                                        yerr=read_err if include_min_max or include_std else None, capsize=5)
                        write_bars = ax.bar(x + bar_width/2, write_vals, bar_width,
                                            label='_Zapis', color=write_colors,
                                            yerr=write_err if include_min_max or include_std else None, capsize=5)

                        for j, bar in enumerate(read_bars):
                            y = bar.get_height()
                            err = read_err[1][j] if include_min_max or include_std else 0
                            ax.text(bar.get_x() + bar.get_width() / 2, y + err + 0.02 * y,
                                    f'{y:.2f}\nOdczyt', ha='center', va='bottom', fontsize=8)

                        for j, bar in enumerate(write_bars):
                            y = bar.get_height()
                            err = write_err[1][j] if include_min_max or include_std else 0
                            ax.text(bar.get_x() + bar.get_width() / 2, y + err + 0.02 * y,
                                    f'{y:.2f}\nZapis', ha='center', va='bottom', fontsize=8)

                        max_height = max(
                            [v + e for v, e in zip(read_vals, read_err[1])] +
                            [v + e for v, e in zip(write_vals, write_err[1])]
                        ) if include_min_max or include_std else max(max(read_vals, default=0), max(write_vals, default=0))
                        ax.set_ylim(0, max_height * 1.4)

                        translated_label = metric_translations.get(base_name, base_name)
                        ax.set_xticks(x)
                        if len(fs_labels) > 6:
                            ax.set_xticklabels(fs_labels, rotation=45, ha='center', fontsize=8)
                        else:
                            ax.set_xticks(x)
                            ax.set_xticklabels(fs_labels, fontsize=10)  

                        ax.set_ylabel(translated_label)
                        ax.set_title(translated_label)
                        # ax.legend()
                        if hdparm_data and hdparm_stat and metric["name"] == "Bandwidth (MiB/s)" and not combine_storage_types:
                            if storage and storage.lower() in hdparm_data:
                                device_data = hdparm_data[storage.lower()]
                                hdparm_val = device_data.get("Bandwidth (MiB/s)", {}).get(hdparm_stat)
                                if hdparm_val:
                                    ylim = ax.get_ylim()
                                    label_text = f'hdparm {storage} ({hdparm_val:.1f} MiB/s)'
                                    if hdparm_val <= ylim[1]:
                                        ax.axhline(hdparm_val, color='#8B0000', linestyle='--', linewidth=1.5, alpha=0.5 , label=label_text)
                                        ax.legend(fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))
                                    else:
                                        # Dodaj "fake" obiekt do legendy
                                        legend_line = Line2D([0], [0], color='#8B0000', linestyle='--', linewidth=1.5, alpha=0.5)
                                        ax.legend([legend_line], [label_text], fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))
                            elif debug:
                                print(f"⚠️ Brak danych hdparm dla {storage} w {metric['name']}")

                    elif plot[0] == "single":
                        _, metric = plot
                        fs_labels = []
                        avg_values = []
                        y_errs = ([], [])
                        color_map = {}

                        for idx, label in enumerate(prepared_data):
                            workload_data = prepared_data[label]
                            metric_data = workload_data.get(metric, {})
                            avg = metric_data.get("avg")
                            if avg is not None:
                                fs_labels.append(label)
                                avg_values.append(avg)

                                if include_min_max:
                                    min_val = metric_data.get("min", avg)
                                    max_val = metric_data.get("max", avg)
                                    y_errs[0].append(avg - min_val)
                                    y_errs[1].append(max_val - avg)
                                elif include_std:
                                    std_val = metric_data.get("std", 0)
                                    y_errs[0].append(std_val)
                                    y_errs[1].append(std_val)


                                color_map[label] = colors[idx % len(colors)]

                        bars = ax.bar(fs_labels, avg_values, color=[color_map[fs] for fs in fs_labels],
                                    yerr=y_errs if include_min_max or include_std else None, capsize=5)

                        if len(fs_labels) > 6:
                            ax.set_xticks(range(len(fs_labels)))
                            ax.set_xticklabels(fs_labels, rotation=45, ha='center', fontsize=8)
                        else:
                            ax.set_xticks(range(len(fs_labels)))
                            ax.set_xticklabels(fs_labels, fontsize=10)

                        for j, bar in enumerate(bars):
                            y = bar.get_height()
                            err = y_errs[1][j] if include_min_max or include_std else 0
                            ax.text(bar.get_x() + bar.get_width() / 2, y + err + 0.02 * y,
                                    f'{y:.2f}', ha='center', va='bottom', fontsize=8)

                        if avg_values:
                            max_height = max([v + e for v, e in zip(avg_values, y_errs[1])] if include_min_max or include_std else avg_values)
                            ax.set_ylim(0, max_height * 1.4)

                        translated_label = metric_translations.get(metric, metric)
                        ax.set_ylabel(translated_label)
                        ax.set_title(translated_label)
                        if hdparm_data and hdparm_stat and "bandwidth" in metric.lower() and not combine_storage_types:
                            if storage and storage.lower() in hdparm_data:
                                device_data = hdparm_data[storage.lower()]
                                hdparm_val = device_data.get("Bandwidth (MiB/s)", {}).get(hdparm_stat)
                                if hdparm_val:
                                    ylim = ax.get_ylim()
                                    label_text = f'hdparm {storage} ({hdparm_val:.1f} MiB/s)'
                                    if hdparm_val <= ylim[1]:
                                        ax.axhline(hdparm_val, color='#8B0000', linestyle='--', linewidth=1.5, alpha=0.5, label=label_text)
                                        ax.legend(fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))

                                    else:
                                        # Dodaj "fake" obiekt do legendy
                                        legend_line = Line2D([0], [0], color='#8B0000', linestyle='--', alpha=0.5, linewidth=1.5)
                                        ax.legend([legend_line], [label_text], fontsize=8, loc='upper left', bbox_to_anchor=(0.78, 1.25))
                            elif debug:
                                print(f"⚠️ Brak danych hdparm dla {storage} w {metric}")

                plt.tight_layout(rect=[0, 0, 1, 0.96])
                if save_dir:
                    os.makedirs("wykresy/"+save_dir, exist_ok=True)
                    safe_combined_label = combined_label.replace(" ", "").replace("/", "")
                    safe_compression_label = "combined" if combine_compression_types else compression.lower()
                    safe_file_systems = "_".join([fs.replace("_", "") for fs in file_systems])
                    filename = f"syntetyk_{safe_file_systems}_{safe_combined_label.lower()}_{block_size.lower()}_{safe_compression_label}_{workload.lower()}.png"
                    filepath = os.path.join("wykresy/"+save_dir, filename)
                    fig.savefig(filepath, bbox_inches='tight')
                    if debug:
                        print(f"📁 Zapisano wykres do pliku: {filepath}")
                    print_latex_image(save_dir, filename)
                else:
                    plt.show()



Funkcja do generowania tabelki fio

In [None]:
def extract_row_data(data, workload, columns, file_systems=None, block_sizes=None, compressions=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size in block_sizes if block_sizes else block_data.keys():
            block_entry = block_data.get(block_size) or block_data.get("default")
            if not block_entry:
                continue
            for compression, devices in block_entry.items():
                if compressions and compression not in compressions:
                    continue
                row = [fs, block_size, compression]
                for col in columns[3:]:  # Skip File System, Block Size, Compression
                    if len(col.split()) > 3:
                        col = col.split()
                        storage, metric, stat = col[0], col[1] + ' ' + col[2], col[3]
                    else:
                        storage, metric, stat = col.split(" ", 2)
                    if "Bandwidth" in metric:
                        metric_key = f"{metric} (MiB/s)"
                    elif "Latency" in metric:
                        metric_key = f"{metric} (ms)"
                    else:
                        metric_key = metric
                    value = "N/A"
                    for device_type, workloads in devices.items():
                        if device_type.lower() == storage.lower() and workload in workloads:
                            value = workloads[workload].get(metric_key, {}).get(stat.lower(), "N/A")
                            break
                    row.append(value)
                rows.append(row)
    return rows


def generate_columns(metrics, stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"]):
    columns = ["File System", "Block Size", "Compression"]
    for storage in storage_types:
        for metric in metrics:
            for stat in stats:
                columns.append(f"{storage} {metric} {stat}")
    return columns


def display_performance_metrics(data, workloads, metrics, stats=["MIN", "AVG", "MAX"],
                                storage_types=["HDD", "SSD", "NVME"], file_systems=None,
                                block_sizes=None, compressions=None):
    for workload in workloads:
        columns = generate_columns(metrics, stats, storage_types)
        rows = extract_row_data(data, workload, columns, file_systems, block_sizes, compressions)
        df = pd.DataFrame(rows, columns=columns)
        display(df.style.set_caption(f"Performance Metrics: {workload.capitalize()}").format(precision=3))


# Example usage
if tests == "article":
    workloads = ["database_article", "seq_read_article", "seq_write_article"] 
elif tests == "zwykle":
    workloads = ["database", "multimedia", "webserver", "archive"]

metrics = {
    'database_article': ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
    'seq_write_article': ["Bandwidth WRITE", "IOPS WRITE", "Latency WRITE"],
    'seq_read_article': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    'database': ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
    'archive': ["Bandwidth WRITE", "IOPS WRITE", "Latency WRITE"],
    'multimedia': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    'webserver': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    "default": ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
}
block_sizes = ["default"]  # Specify block sizes to display
#storage_types=["hdd", "stripe2hdd", "stripe4hdd", "mirror", "raidz1", "raidz2", "raid10"]
storage_types=["hdd"]
# Generate and display tables for each workload
for workload in workloads:
  
    workload_metrics = metrics.get(workload, metrics["default"])
    display_performance_metrics(fio_resultsdict, [workload], workload_metrics, block_sizes=block_sizes, storage_types=storage_types, stats = ["AVG"], file_systems = ["zfs_nocache"])

In [None]:

# Example usage
metrics = [
    {"name": "Bandwidth (MiB/s)", "read": "Bandwidth READ (MiB/s)", "write": "Bandwidth WRITE (MiB/s)"},
    #{"name": "IOPS", "read": "IOPS READ", "write": "IOPS WRITE"}, 
    #{"name": "Latency (ms)", "read": "Latency READ (ms)", "write": "Latency WRITE (ms)"},
]

if tests == "article":
    workloads = ["database_article", "seq_read_article", "seq_write_article"]
elif tests == "zwykle":
    workloads = ["database", "multimedia", "webserver", "archive"]

#storage_types = ['HDD','RAID02HDD', 'RAID04HDD', 'RAID1', 'RAID5', 'RAID6', 'RAID10']
#storage_types = ["HDD"]

block_sizes = ['default']  

file_systems = ['zfs']  #['exfat','ext4', 'xfs', 'btrfs', "f2fs", "zfs", "zfs_nocache"]

#workloads = ["database"]

#compression_types = ["none","zlib_1", "zlib_3", "zlib-9", "zstd_1", "zstd_1", "zstd_3", "zstd_9", "zstd_15"]  # Kompresja brfs
compression_types = ["none", "gzip-1", "gzip-3", "gzip-9", "lz4"]  # Kompresja zfs

for workload in workloads:
    #plot_performance_metrics(fio_resultsdict, metrics, storage_types, block_sizes, include_min_max=True, workload=workload, hdparm_data=hdparm_resultsdict, hdparm_stat='avg' )
    plot_performance_metrics(fio_resultsdict, metrics, storage_types, block_sizes, compression_types=compression_types, include_min_max=True, workload=workload, combine_storage_types=True, combine_compression_types=True, file_systems=file_systems, hdparm_data=hdparm_resultsdict, hdparm_stat='avg', preserve_ylim=True, debug=True)



In [None]:
# Function to generate all possible columns
def generate_columns(metrics, stats=["MIN", "MAX", "AVG"], storage_types=["HDD", "SSD", "NVME"], file_systems=None):
    columns = ["File System", "Block Size"]  # Include Block Size in columns
    for storage in storage_types:
        for metric in metrics:
            for stat in stats:
                columns.append(f"{storage} {metric} {stat}")
    return columns

def extract_row_data(data, columns, file_systems=None, block_sizes=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size, devices in block_data.items():  # Iterate over block sizes
            if block_sizes and block_size not in block_sizes:
                continue
            row = [fs, block_size]  # Add File System and Block Size to the row
            for col in columns[2:]:  # Skip File System and Block Size
                if len(col.split()) > 3:
                    col = col.split()
                    storage, metric, stat = col[0], col[1] + ' ' + col[2], col[3]
                else:
                    storage, metric, stat = col.split(" ", 2)
                metric_key = f"{metric} (MiB/s)" if "Bandwidth" in metric else metric
                # Extract value
                value = "N/A"
                for device_type, workloads in devices.items():
                    if device_type.lower() == storage.lower():
                        for operation, metrics in workloads.items():
                            if metric_key in metrics:
                                value = metrics[metric_key].get(stat.lower(), "N/A")
                                break
                row.append(value)
            rows.append(row)
    return rows

# Example data
# Generate and display tables for dd_resultsdict
columns = generate_columns(["Bandwidth READ", "Bandwidth WRITE"], stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"])
rows = extract_row_data(dd_resultsdict, columns, block_sizes=["4096"])  # Specify block sizes
df = pd.DataFrame(rows, columns=columns)
display(df.style.set_caption("Performance Metrics: DD Results").format(precision=3))

In [None]:

# Plot performance metrics for dd data
plot_performance_metrics(dd_resultsdict, metrics=["Bandwidth READ (MiB/s)", "Bandwidth WRITE (MiB/s)"], storage_types=storage_types, block_sizes=['4096']
, include_min_max=True, hdparm_data=hdparm_resultsdict, hdparm_stat='max' )

#TODO dodanie uniwersalnej funkcji fo tabelek