Czytanie plików fio i dd

In [None]:
import os
import re
from collections import defaultdict
import glob
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.patches import Patch


In [None]:
def parse_fio_results(file_path):
    # Regular expressions
    bandwidth_regex = re.compile(r'WRITE: bw=(\d+(?:\.\d+)?)([MK]iB/s)')
    bandwidth_read_regex = re.compile(r'READ: bw=(\d+(?:\.\d+)?)([MK]iB/s)')
    iops_regex = re.compile(r'write: IOPS=(\d+)')
    iops_read_regex = re.compile(r'read: IOPS=(\d+)')
    latency_regex = re.compile(r'lat (\([mu]sec\)): min=\d+\.?\d*[km]?, max=\d+\.?\d*[km]?, avg=(\d+\.\d+[km]?), stdev=\d+\.?\d*')

    # Function to convert bandwidth to MiB/s
    def convert_bandwidth(value, unit):
        value = float(value)
        if unit == "KiB/s":
            return value / 1024  # Convert KiB/s to MiB/s
        return value  # Already in MiB/s

    results = {}

    with open(file_path, 'r') as file:
        last = 'read'
        for line in file:
            # Match write bandwidth
            if 'write' in line:
                last = 'write'
            elif 'read' in line:
                last = 'read'
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                value, unit = bw_match.groups()
                results['Bandwidth WRITE (MiB/s)'] = convert_bandwidth(value, unit)

            # Match read bandwidth
            bw_read_match = bandwidth_read_regex.search(line)
            if bw_read_match:
                value, unit = bw_read_match.groups()
                results['Bandwidth READ (MiB/s)'] = convert_bandwidth(value, unit)

            # Match write IOPS
            iops_match = iops_regex.search(line)
            if iops_match:
                results['IOPS WRITE'] = float(iops_match.group(1))

            # Match read IOPS
            iops_read_match = iops_read_regex.search(line)
            if iops_read_match:
                results['IOPS READ'] = float(iops_read_match.group(1))

            # Match latency
            lat_match = latency_regex.search(line)
            if lat_match:
                lat_val = float(lat_match.group(2))
                if lat_match.group(1) == '(usec)':
                    lat_val /= 1000
                if last == 'read':
                    results['Latency READ (ms)'] = lat_val
                else:
                    results['Latency WRITE (ms)'] = lat_val

    return results

def parse_dd_results(file_path):
    # Regular expressions
    bandwidth_regex = re.compile(r'(\d+(?:\.\d+)?) ([GMK]B/s)')
    time_regex = re.compile(r'(\d+(?:\.\d+)?) s')

    # Function to convert bandwidth to MiB/s
    def convert_bandwidth(value, unit):
        value = float(value)
        if unit == "KB/s":
            return value / 1024  # Convert KB/s to MiB/s
        elif unit == "MB/s":
            return value  # Already in MiB/s
        elif unit == "GB/s":
            return value * 1024  # Convert GB/s to MiB/s
        return value

    results = {}

    with open(file_path, 'r') as file:
        for line in file:
            # Match bandwidth
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                value, unit = bw_match.groups()
                if 'write' in file_path:
                    results['Bandwidth WRITE (MiB/s)'] = convert_bandwidth(value, unit)
                else:
                    results['Bandwidth READ (MiB/s)'] = convert_bandwidth(value, unit)

            # Match time
            time_match = time_regex.search(line)
            if time_match:
                results['Time (s)'] = float(time_match.group(1))

    return results

import re

import re

def extract_values(resultsfolder, file_names, parser, program_type):
    resultsdict = defaultdict(lambda: defaultdict(dict))
    
    # Funkcja pomocnicza do wyodrębnienia nazwy od drugiego słowa do słowa "test"
    def extract_key(file_name):
        match = re.match(r"^[^_]+_(.*?)_test", file_name)  # Pomija pierwsze słowo przed "_"
        return match.group(1) if match else os.path.splitext(file_name)[0]

    prepaths = [folder for folder in glob.glob(resultsfolder + '*/') if program_type in folder]  # Filtruj według typu programu
    for prepath in prepaths:
        # Wyodrębnij system plików i typ pamięci, uwzględniając podkreślenia w nazwach
        folder_parts = prepath.split('\\')[-2].split('_')
        filesystem = '_'.join(folder_parts[2:-1])  # Wszystkie części między 2 a ostatnią
        storage = folder_parts[-1]  # Ostatnia część to typ pamięci
        
        # Sprawdź, czy typ programu używa folderów z rozmiarem bloków
        if program_type in ['fio_results', 'dd_results']:
            block_size_folders = [folder for folder in glob.glob(prepath + '*/')]  # Uwzględnij foldery z rozmiarem bloków
            for block_size_folder in block_size_folders:
                # Bezpiecznie wyodrębnij rozmiar bloku
                folder_parts = block_size_folder.split('\\')[-2].split('_')
                if len(folder_parts) > 2 and folder_parts[0] == "block" and folder_parts[1] == "size":
                    block_size = folder_parts[2]
                else:
                    print(f"Pomijanie folderu o nieoczekiwanej strukturze: {block_size_folder}")
                    continue

                folders = [folder for folder in glob.glob(block_size_folder + '*/')]
                cumulative_data = {}
                for folder in folders:
                    for file_name in file_names:
                        file_path = os.path.join(folder, file_name)
                        if os.path.exists(file_path):
                            try:
                                results = parser(file_path)
                                if results:  # Dodaj tylko, jeśli są jakieś dane
                                    test_key = extract_key(file_name)
                                    if test_key not in cumulative_data:
                                        cumulative_data[test_key] = defaultdict(list)
                                    for key, value in results.items():
                                        cumulative_data[test_key][key].append(value)
                            except Exception as e:
                                print(f"Błąd podczas parsowania {file_path}: {e}")
                        else:
                            print(f"Plik nie znaleziony: {file_path}")

                ranges = {}
                for test_key, metrics in cumulative_data.items():
                    if metrics:  # Dodaj tylko, jeśli są jakieś dane
                        ranges[test_key] = {
                            key: {'min': round(min(values), 3), 'max': round(max(values), 3), 'avg': round(sum(values) / len(values), 2)} if values else '-'
                            for key, values in metrics.items()
                        }
                if block_size not in resultsdict[filesystem]:
                    resultsdict[filesystem][block_size] = {}
                resultsdict[filesystem][block_size][storage] = ranges
        else:
            # Obsługa programów bez folderów z rozmiarem bloków (np. hdparm)
            folders = [folder for folder in glob.glob(prepath + '*/')]
            cumulative_data = {}
            for folder in folders:
                for file_name in file_names:
                    file_path = os.path.join(folder, file_name)
                    if os.path.exists(file_path):
                        try:
                            results = parser(file_path)
                            if results:  # Dodaj tylko, jeśli są jakieś dane
                                test_key = extract_key(file_name)
                                if test_key not in cumulative_data:
                                    cumulative_data[test_key] = defaultdict(list)
                                for key, value in results.items():
                                    cumulative_data[test_key][key].append(value)
                        except Exception as e:
                            print(f"Błąd podczas parsowania {file_path}: {e}")
                    else:
                        print(f"Plik nie znaleziony: {file_path}")

            ranges = {}
            for test_key, metrics in cumulative_data.items():
                if metrics:  # Dodaj tylko, jeśli są jakieś dane
                    ranges[test_key] = {
                        key: {'min': round(min(values), 3), 'max': round(max(values), 3), 'avg': round(sum(values) / len(values), 2)} if values else '-'
                        for key, values in metrics.items()
                    }
            if 'no_block_size' not in resultsdict[filesystem]:
                resultsdict[filesystem]['no_block_size'] = {}
            resultsdict[filesystem]['no_block_size'][storage] = ranges
    return resultsdict

def parse_hdparm_results(file_path):
    # Regular expression to match the bandwidth
    bandwidth_regex = re.compile(r'Timing O_DIRECT disk reads: (\d+(?:\.\d+)?) MB in .* seconds = (\d+(?:\.\d+)) MB/sec')

    results = {}

    with open(file_path, 'r') as file:
        for line in file:
            # Match bandwidth
            bw_match = bandwidth_regex.search(line)
            if bw_match:
                total_mb, bandwidth = bw_match.groups()
                results['Total Data Read (MB)'] = float(total_mb)
                results['Bandwidth (MiB/s)'] = float(bandwidth)

    return results
    
def extract_hdparm_values_by_device(resultsfolder, file_names, parser, group_by_computer=False):
    resultsdict = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) if group_by_computer else defaultdict(lambda: defaultdict(list))

    # Iterate through all folders in the results folder
    prepaths = glob.glob(resultsfolder + '*/')  # Get all subfolders
    for prepath in prepaths:
        # Extract device type from the folder name (e.g., "hdparm_results_xfs_nvme")
        device_type = prepath.split('_')[-1].lower().strip('\\')  # Extract "nvme", "ssd", etc., and remove trailing slashes
        
        # Iterate through subfolders for each computer
        folders = glob.glob(prepath + '*/')  # Get subfolders for each computer
        for folder in folders:
            computer_name = folder.split('\\')[-2]  # Extract computer name (e.g., "lab-sec-13")
            for file_name in file_names:
                file_path = os.path.join(folder, file_name)
                if os.path.exists(file_path):
                    try:
                        # Parse the file and collect results
                        results = parser(file_path)
                        if group_by_computer:
                            for key, value in results.items():
                                resultsdict[device_type][computer_name][key].append(value)
                        else:
                            for key, value in results.items():
                                resultsdict[device_type][key].append(value)
                    except Exception as e:
                        print(f"Error parsing {file_path}: {e}")
                else:
                    print(f"File not found: {file_path}")

    # Aggregate results by calculating min, max, and avg for each metric
    aggregated_results = {}
    for device_type, computers_or_metrics in resultsdict.items():
        if group_by_computer:
            aggregated_results[device_type] = {}
            for computer, metrics in computers_or_metrics.items():
                aggregated_results[device_type][computer] = {
                    key: {
                        'min': round(min(values), 3),
                        'max': round(max(values), 3),
                        'avg': round(sum(values) / len(values), 2)
                    } if values else '-' for key, values in metrics.items()
                }
        else:
            aggregated_results[device_type] = {
                key: {
                    'min': round(min(values), 3),
                    'max': round(max(values), 3),
                    'avg': round(sum(values) / len(values), 2)
                } if values else '-' for key, values in computers_or_metrics.items()
            }

    return aggregated_results

In [None]:

# Example usage for fio
fio_file_names = [
    #'fio_database_test_output.txt',
    #'fio_multimedia_test_output.txt',
    #'fio_webserver_test_output.txt',
    #'fio_archive_test_output.txt',
    'fio_database_article_test_output.txt',
    'fio_seq_read_article_test_output.txt',
    'fio_seq_write_article_test_output.txt',
]

fio_resultsdict = extract_values('../wyniki_10G_article/', fio_file_names, parse_fio_results, program_type='fio_results')
print(fio_resultsdict)

# Example usage for dd
dd_file_names = [
    'dd_read_test_output.txt',
    'dd_write_test_output.txt',
]

dd_resultsdict = extract_values('../wyniki_10G_article/', dd_file_names, parse_dd_results, program_type='dd_results')
print(dd_resultsdict)

hdparm_file_names = [
    'hdparm_test_output.txt',
]

hdparm_resultsdict = extract_hdparm_values_by_device('../wyniki_10G_article/', hdparm_file_names, parse_hdparm_results, group_by_computer=False)

print(hdparm_resultsdict)

Funkcja do generowania wykresów

In [None]:

def darken_color(color, amount=0.7):
    """Zmniejsz jasność koloru RGB."""
    c = np.array(to_rgb(color))
    return tuple(np.clip(c * amount, 0, 1))

def plot_performance_metrics(data, metrics, storage_types, block_sizes, include_min_max=False, workload=None, file_systems=None, colors=None, hdparm_data=None, hdparm_stat='avg', preserve_ylim=False, debug=False):
    if colors is None:
        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'orange']
    if file_systems is None:
        file_systems = list(data.keys())

    for block_size in block_sizes:
        for storage in storage_types:
            plots = []

            # Rozpoznaj typy metryk
            for metric in metrics:
                if isinstance(metric, dict):
                    read_metric = metric.get("read")
                    write_metric = metric.get("write")
                    fs_data_check = data[file_systems[0]].get(block_size) or data[file_systems[0]].get('default')
                    if not fs_data_check:
                        continue
                    storage_data_check = fs_data_check.get(storage.lower())
                    if not storage_data_check:
                        continue
                    workload_data_check = storage_data_check.get(workload)
                    if not workload_data_check:
                        continue
                    if read_metric and write_metric and read_metric in workload_data_check and write_metric in workload_data_check:
                        plots.append(("grouped", metric["name"], read_metric, write_metric))
                    elif read_metric and read_metric in workload_data_check:
                        plots.append(("single", read_metric))
                    elif write_metric and write_metric in workload_data_check:
                        plots.append(("single", write_metric))
                    else:
                        plots.append(("single", metric["name"]))
                else:
                    fs_data_check = data[file_systems[0]].get(block_size) or data[file_systems[0]].get('default')
                    if not fs_data_check:
                        continue
                    storage_data_check = fs_data_check.get(storage.lower())
                    if not storage_data_check:
                        continue
                    workload_data_check = storage_data_check.get(workload)
                    if metric in workload_data_check:
                        plots.append(("single", metric))

            fig, axs = plt.subplots(len(plots), 1, figsize=(10, 3 * len(plots)))
            if len(plots) == 1:
                axs = [axs]

            fig.suptitle(f'Performance Metrics for {storage} (Block Size: {block_size}){" - " + workload.capitalize() if workload else ""}')

            for i, plot in enumerate(plots):
                ax = axs[i]
                if plot[0] == "grouped":
                    _, base_name, read_metric, write_metric = plot
                    fs_labels = []
                    read_vals, write_vals = [], []
                    read_mins, read_maxs = [], []
                    write_mins, write_maxs = [], []
                    color_map = {}

                    for idx, fs in enumerate(file_systems):
                        fs_storage_data = data.get(fs, {})
                        fs_block_data = fs_storage_data.get(block_size)
                        if not fs_block_data and 'default' in fs_storage_data:
                            if debug:
                                print(f"ℹ️ Użyto 'default' dla systemu plików: {fs}, block_size: {block_size}")
                            fs_block_data = fs_storage_data.get('default')

                        if not fs_block_data:
                            print(f"⚠️ Pomijanie {fs}: brak danych dla block_size '{block_size}' i brak 'default'")
                            continue

                        op_data = fs_block_data.get(storage.lower(), {}).get(workload)
                        if not isinstance(op_data, dict):
                            print(f"⚠️ Pomijanie {fs}: brak danych workload '{workload}' dla storage '{storage}' z block_size '{block_size}'")
                            continue

                        read_data = op_data.get(read_metric, {})
                        write_data = op_data.get(write_metric, {})
                        read_avg = read_data.get('avg')
                        write_avg = write_data.get('avg')
                        if read_avg is not None and write_avg is not None:
                            fs_labels.append(fs)
                            read_vals.append(read_avg)
                            write_vals.append(write_avg)
                            read_mins.append(read_data.get('min', read_avg))
                            read_maxs.append(read_data.get('max', read_avg))
                            write_mins.append(write_data.get('min', write_avg))
                            write_maxs.append(write_data.get('max', write_avg))
                            color_map[fs] = colors[idx % len(colors)]

                    x = np.arange(len(fs_labels))
                    bar_width = 0.35
                    read_colors = [color_map[fs] for fs in fs_labels]
                    write_colors = [darken_color(color_map[fs]) for fs in fs_labels]

                    ax.bar(x - bar_width/2, read_vals, bar_width, label='READ', color=read_colors)
                    ax.bar(x + bar_width/2, write_vals, bar_width, label='WRITE', color=write_colors)

                    if include_min_max:
                        read_yerr = [np.array(read_vals) - np.array(read_mins), np.array(read_maxs) - np.array(read_vals)]
                        write_yerr = [np.array(write_vals) - np.array(write_mins), np.array(write_maxs) - np.array(write_vals)]

                        ax.errorbar(x - bar_width/2, read_vals, yerr=np.abs(read_yerr), fmt='none', ecolor='black', capsize=5)
                        ax.errorbar(x + bar_width/2, write_vals, yerr=np.abs(write_yerr), fmt='none', ecolor='black', capsize=5)

                    ax.set_xticks(x)
                    ax.set_xticklabels(fs_labels)
                    ax.set_ylabel(base_name)
                    ax.set_title(base_name)
                    ax.legend()
                    if preserve_ylim:
                        ax.set_ylim(ax.get_ylim())

                elif plot[0] == "single":
                    _, metric = plot
                    fs_labels = []
                    avg_values, min_values, max_values = [], [], []
                    color_map = {}

                    for idx, fs in enumerate(file_systems):
                        fs_storage_data = data.get(fs, {})
                        fs_block_data = fs_storage_data.get(block_size)
                        if not fs_block_data and 'default' in fs_storage_data:
                            if debug:                            
                                print(f"ℹ️ Użyto 'default' dla systemu plików: {fs}, block_size: {block_size}")
                            fs_block_data = fs_storage_data.get('default')

                        if not fs_block_data:
                            print(f"⚠️ Pomijanie {fs}: brak danych dla block_size '{block_size}' i brak 'default'")
                            continue

                        op_data = fs_block_data.get(storage.lower(), {}).get(workload)
                        if not isinstance(op_data, dict):
                            print(f"⚠️ Pomijanie {fs}: brak danych workload '{workload}' dla storage '{storage}' z block_size '{block_size}'")
                            continue

                        metric_data = op_data.get(metric, {})
                        avg = metric_data.get("avg")
                        if avg is not None:
                            fs_labels.append(fs)
                            avg_values.append(avg)
                            min_values.append(metric_data.get("min", avg))
                            max_values.append(metric_data.get("max", avg))
                            color_map[fs] = colors[idx % len(colors)]

                    ax.bar(fs_labels, avg_values, color=[color_map[fs] for fs in fs_labels])
                    if include_min_max:
                        yerr = [np.array(avg_values) - np.array(min_values), np.array(max_values) - np.array(avg_values)]
                        ax.errorbar(fs_labels, avg_values, yerr=np.abs(yerr), fmt='none', color='black', capsize=5)

                    if hdparm_data and storage.lower() in hdparm_data and "Bandwidth" in metric:
                        hdparm_value = hdparm_data[storage.lower()].get('Bandwidth (MiB/s)', {}).get(hdparm_stat)
                        if hdparm_value is not None:
                            ax.axhline(y=hdparm_value, color='red', linestyle='--', label=f'hdparm {hdparm_stat.capitalize()}')
                            ax.legend()

                    if preserve_ylim:
                        ax.set_ylim(ax.get_ylim())

                    ax.set_ylabel(metric)
                    ax.set_title(metric)

            plt.tight_layout(rect=[0, 0, 1, 0.96])
            plt.show()

Funkcja do generowania tabelki fio

In [None]:
import pandas as pd

def generate_columns(metrics, stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"]):
    columns = ["File System", "Block Size"]
    for storage in storage_types:
        for metric in metrics:
            for stat in stats:
                columns.append(f"{storage} {metric} {stat}")
    return columns

def extract_row_data(data, workload, columns, file_systems=None, block_sizes=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size, devices in block_data.items():
            if block_sizes and block_size not in block_sizes:
                continue
            row = [fs, block_size]
            for col in columns[2:]:  # Skip File System and Block Size
                if len(col.split()) > 3:
                    col = col.split()
                    storage, metric, stat = col[0], col[1] + ' ' + col[2], col[3]
                else:
                    storage, metric, stat = col.split(" ", 2)
                if "Bandwidth" in metric:
                    metric_key = f"{metric} (MiB/s)"
                elif "Latency" in metric:
                    metric_key = f"{metric} (ms)"
                else:
                    metric_key = metric
                # Extract value
                value = "N/A"
                for device_type, workloads in devices.items():
                    if device_type.lower() == storage.lower() and workload in workloads:
                        value = workloads[workload].get(metric_key, {}).get(stat.lower(), "N/A")
                        break
                row.append(value)
            rows.append(row)
    return rows

def display_performance_metrics(data, workloads, metrics, stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"], file_systems=None, block_sizes=None):
    for workload in workloads:
        columns = generate_columns(metrics, stats, storage_types)
        rows = extract_row_data(data, workload, columns, file_systems, block_sizes)
        df = pd.DataFrame(rows, columns=columns)
        display(df.style.set_caption(f"Performance Metrics: {workload.capitalize()}").format(precision=3))

# Example usage
#workloads = ["database", "multimedia", "webserver", "archive"]
workloads = ["database_article", "seq_read_article", "seq_write_article"] 
metrics = {
    'database_article': ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
    'seq_write_article': ["Bandwidth WRITE", "IOPS WRITE", "Latency WRITE"],
    'seq_read_article': ["Bandwidth READ", "IOPS READ", "Latency READ"],
    "default": ["Bandwidth READ", "Bandwidth WRITE", "IOPS READ", "IOPS WRITE", "Latency READ", "Latency WRITE"],
}
block_sizes = ["4096"]  # Specify block sizes to display

# Generate and display tables for each workload
for workload in workloads:
  
    workload_metrics = metrics.get(workload, metrics["default"])
    display_performance_metrics(fio_resultsdict, [workload], workload_metrics, block_sizes=block_sizes)

In [None]:

# Example usage
metrics = [
    {"name": "Bandwidth (MiB/s)", "read": "Bandwidth READ (MiB/s)", "write": "Bandwidth WRITE (MiB/s)"},
    {"name": "IOPS", "read": "IOPS READ", "write": "IOPS WRITE"}, 
    "Latency READ (ms)", "Latency WRITE (ms)",
]

storage_types = ['HDD', 'SSD', 'NVME']
block_sizes = ['4096']

# Plot performance metrics for fio data
#workloads = ["database", "multimedia", "webserver", "archive"]
workloads = ["database_article", "seq_read_article", "seq_write_article"]
for workload in workloads:
    #plot_performance_metrics(fio_resultsdict, metrics, storage_types, block_sizes, include_min_max=True, workload=workload, hdparm_data=hdparm_resultsdict, hdparm_stat='avg' )
    plot_performance_metrics(fio_resultsdict, metrics, storage_types, block_sizes, include_min_max=True, workload=workload )



In [None]:
# Function to generate all possible columns
def generate_columns(metrics, stats=["MIN", "MAX", "AVG"], storage_types=["HDD", "SSD", "NVME"], file_systems=None):
    columns = ["File System", "Block Size"]  # Include Block Size in columns
    for storage in storage_types:
        for metric in metrics:
            for stat in stats:
                columns.append(f"{storage} {metric} {stat}")
    return columns

def extract_row_data(data, columns, file_systems=None, block_sizes=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size, devices in block_data.items():  # Iterate over block sizes
            if block_sizes and block_size not in block_sizes:
                continue
            row = [fs, block_size]  # Add File System and Block Size to the row
            for col in columns[2:]:  # Skip File System and Block Size
                if len(col.split()) > 3:
                    col = col.split()
                    storage, metric, stat = col[0], col[1] + ' ' + col[2], col[3]
                else:
                    storage, metric, stat = col.split(" ", 2)
                metric_key = f"{metric} (MiB/s)" if "Bandwidth" in metric else metric
                # Extract value
                value = "N/A"
                for device_type, workloads in devices.items():
                    if device_type.lower() == storage.lower():
                        for operation, metrics in workloads.items():
                            if metric_key in metrics:
                                value = metrics[metric_key].get(stat.lower(), "N/A")
                                break
                row.append(value)
            rows.append(row)
    return rows

# Example data
# Generate and display tables for dd_resultsdict
columns = generate_columns(["Bandwidth READ", "Bandwidth WRITE"], stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"])
rows = extract_row_data(dd_resultsdict, columns, block_sizes=["4096"])  # Specify block sizes
df = pd.DataFrame(rows, columns=columns)
display(df.style.set_caption("Performance Metrics: DD Results").format(precision=3))

In [None]:

# Plot performance metrics for dd data
plot_performance_metrics(dd_resultsdict, metrics=["Bandwidth READ (MiB/s)", "Bandwidth WRITE (MiB/s)"], storage_types=storage_types, block_sizes=['4096']
, include_min_max=True, hdparm_data=hdparm_resultsdict, hdparm_stat='max' )

#TODO dodanie uniwersalnej funkcji fo tabelek

In [None]:
import pandas as pd

def generate_columns(metrics, stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"]):
    columns = ["File System", "Block Size"]
    for storage in storage_types:
        for metric in metrics:
            for stat in stats:
                columns.append(f"{storage} {metric} {stat}")
    return columns

def extract_row_data(data, workload, columns, file_systems=None, block_sizes=None):
    rows = []
    for fs, block_data in data.items():
        if file_systems and fs not in file_systems:
            continue
        for block_size, devices in block_data.items():
            if block_sizes and block_size not in block_sizes:
                continue
            row = [fs, block_size]
            for col in columns[2:]:  # Skip File System and Block Size
                if len(col.split()) > 3:
                    col = col.split()
                    storage, metric, stat = col[0], col[1] + ' ' + col[2], col[3]
                else:
                    storage, metric, stat = col.split(" ", 2)
                if workload == 'read':
                    metric_key = f"{metric} READ (MiB/s)" if "Bandwidth" in metric else f"{metric} READ"
                elif workload == 'write':
                    metric_key = f"{metric} WRITE (MiB/s)" if "Bandwidth" in metric else f"{metric} WRITE"
                else:
                    metric_key = f"{metric} (MiB/s)" if "Bandwidth" in metric else metric
                # Extract value
                value = "N/A"
                for device_type, workloads in devices.items():
                    if device_type.lower() == storage.lower() and workload in workloads:
                        value = workloads[workload].get(metric_key, {}).get(stat.lower(), "N/A")
                        break
                row.append(value)
            rows.append(row)
    return rows

def display_performance_metrics(data, workloads, metrics, stats=["MIN", "AVG", "MAX"], storage_types=["HDD", "SSD", "NVME"], file_systems=None, block_sizes=None):
    for workload in workloads:
        columns = generate_columns(metrics, stats, storage_types)
        rows = extract_row_data(data, workload, columns, file_systems, block_sizes)
        df = pd.DataFrame(rows, columns=columns)
        display(df.style.set_caption(f"Performance Metrics: {workload.capitalize()}").format(precision=3))

# Example usage for dd
dd_workloads = ["read", "write"]
dd_metrics = ["Bandwidth", "Time"]
block_sizes = ["4096"]  # Specify block sizes to display

# Generate and display tables for dd data
for workload in dd_workloads:
    display_performance_metrics(dd_resultsdict, [workload], dd_metrics, block_sizes=block_sizes)