In [4]:
# plot_results.py

import matplotlib.pyplot as plt
import numpy as np
import re
import os

def parse_single_file(file_path, library_name):
    """
    Parses a single result file and extracts the algorithm name, training time, and inference time.

    Args:
        file_path (str): Path to the result file.
        library_name (str): Name of the library ('Dlib' or 'scikit-learn').

    Returns:
        dict: A dictionary with algorithm names as keys and
              {'Training': time, 'Inference': time} as values.
    """
    results = {}
    with open(file_path, 'r') as file:
        lines = file.readlines()

    algo_name = None
    for line in lines:
        # Match lines like 'C++ (Dlib) - Linear Regression:'
        algo_match = re.match(r'(C\+\+ \(Dlib\)|Python \(scikit-learn\)) - ([\w\s]+):', line)
        if algo_match:
            algo_name = algo_match.group(2).strip()
            results[algo_name] = {'Training': None, 'Inference': None}
            continue  # Proceed to next line

        # Match 'Training Time: X seconds'
        if 'Training Time:' in line and algo_name:
            train_match = re.search(r'Training Time:\s+([\d.eE+-]+)\s+seconds', line)
            if train_match:
                results[algo_name]['Training'] = float(train_match.group(1))
            continue  # Proceed to next line

        # Match 'Inference Time: X seconds'
        if 'Inference Time:' in line and algo_name:
            infer_match = re.search(r'Inference Time:\s+([\d.eE+-]+)\s+seconds', line)
            if infer_match:
                results[algo_name]['Inference'] = float(infer_match.group(1))
            continue  # Proceed to next line

    # Remove any algorithms that didn't have both times recorded
    clean_results = {k: v for k, v in results.items() if v['Training'] is not None and v['Inference'] is not None}
    return clean_results

def parse_scikit_files(directory):
    """
    Parses all scikit-learn result files in the specified directory.

    Args:
        directory (str): Path to the directory containing scikit-learn result files.

    Returns:
        dict: A dictionary with algorithm names as keys and
              {'Training': time, 'Inference': time} as values.
    """
    scikit_results = {}
    # List all .txt files except 'dlib_results.txt'
    for filename in os.listdir(directory):
        if filename.endswith('.txt') and filename != 'dlib_results.txt':
            file_path = os.path.join(directory, filename)
            temp_results = parse_single_file(file_path, 'scikit-learn')
            # Merge temp_results into scikit_results
            for algo, times in temp_results.items():
                scikit_results[algo] = times
    return scikit_results

def parse_dlib_file(file_path):
    """
    Parses the Dlib result file.

    Args:
        file_path (str): Path to the Dlib result file.

    Returns:
        dict: A dictionary with algorithm names as keys and
              {'Training': time, 'Inference': time} as values.
    """
    dlib_results = parse_single_file(file_path, 'Dlib')
    return dlib_results

def plot_comparison(algorithms, scikit_train, dlib_train, scikit_infer, dlib_infer):
    """
    Plots the training and inference time comparisons.

    Args:
        algorithms (list): List of algorithm names.
        scikit_train (list): scikit-learn training times.
        dlib_train (list): Dlib training times.
        scikit_infer (list): scikit-learn inference times.
        dlib_infer (list): Dlib inference times.
    """
    x = np.arange(len(algorithms))  # Label locations
    width = 0.35  # Bar width

    # Plot Training Times
    fig, ax = plt.subplots(figsize=(12, 6))
    rects1 = ax.bar(x - width/2, scikit_train, width, label='scikit-learn Training', color='skyblue')
    rects2 = ax.bar(x + width/2, dlib_train, width, label='Dlib Training', color='navy')

    # Add labels, title, and custom x-axis tick labels
    ax.set_ylabel('Time (seconds)')
    ax.set_title('Training Time Comparison: scikit-learn vs Dlib')
    ax.set_xticks(x)
    ax.set_xticklabels(algorithms, rotation=45, ha='right')
    ax.legend()

    # Attach a text label above each bar
    def autolabel(rects):
        for rect in rects:
            height = rect.get_height()
            ax.annotate(f'{height:.6f}',
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom', fontsize=8)

    autolabel(rects1)
    autolabel(rects2)

    plt.tight_layout()
    plt.savefig('training_time_comparison.png')
    plt.show()

    # Plot Inference Times
    fig, ax = plt.subplots(figsize=(12, 6))
    rects1 = ax.bar(x - width/2, scikit_infer, width, label='scikit-learn Inference', color='lightgreen')
    rects2 = ax.bar(x + width/2, dlib_infer, width, label='Dlib Inference', color='darkgreen')

    # Add labels, title, and custom x-axis tick labels
    ax.set_ylabel('Time (seconds)')
    ax.set_title('Inference Time Comparison: scikit-learn vs Dlib')
    ax.set_xticks(x)
    ax.set_xticklabels(algorithms, rotation=45, ha='right')
    ax.legend()

    # Attach a text label above each bar
    autolabel(rects1)
    autolabel(rects2)

    plt.tight_layout()
    plt.savefig('inference_time_comparison.png')
    plt.show()

def main():
    # Directory containing the result files
    directory = '.'  # Current directory

    # Parse scikit-learn results
    scikit_results = parse_scikit_files(directory)

    # Parse Dlib results
    dlib_file_path = os.path.join(directory, 'dlib_results.txt')
    if not os.path.exists(dlib_file_path):
        print(f"Error: '{dlib_file_path}' not found.")
        return
    dlib_results = parse_dlib_file(dlib_file_path)

    # Find common algorithms
    common_algorithms = set(scikit_results.keys()).intersection(set(dlib_results.keys()))
    if not common_algorithms:
        print("No common algorithms found between scikit-learn and Dlib results.")
        return

    # Sort algorithms for consistent plotting
    algorithms = sorted(common_algorithms)

    # Prepare data for plotting
    scikit_train = [scikit_results[algo]['Training'] for algo in algorithms]
    scikit_infer = [scikit_results[algo]['Inference'] for algo in algorithms]
    dlib_train = [dlib_results[algo]['Training'] for algo in algorithms]
    dlib_infer = [dlib_results[algo]['Inference'] for algo in algorithms]

    # Plot the comparisons
    plot_comparison(algorithms, scikit_train, dlib_train, scikit_infer, dlib_infer)

if __name__ == "__main__":
    main()


No common algorithms found between Dlib and scikit-learn results.
