In [8]:
import re
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')

OUTPUT_DIR = Path('figures_comparison')
OUTPUT_DIR.mkdir(exist_ok=True)


In [9]:
def parse_sa_log(text: str) -> pd.DataFrame:
    re_dataset = re.compile(r"^Processing dataset:\s*(\S+)")
    re_layout = re.compile(r"^Processing initial layout:\s*(.+)")
    re_neighbor = re.compile(r"^Neighbor method:\s*(\S+)")
    re_run = re.compile(r"^Running experiment\s*(\d+)/(?:\d+):\s*(\S+)")
    re_completed = re.compile(r"Completed:\s*Final fitness\s*=\s*([0-9.,]+)")
    re_result = re.compile(r"Results saved to:\s*(.+)")

    dataset = None
    layout = None
    neighbor = None
    rows = []

    lines = text.splitlines()
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        m = re_dataset.match(line)
        if m:
            dataset = m.group(1)
            i += 1
            continue

        m = re_layout.match(line)
        if m:
            layout = m.group(1)
            i += 1
            continue

        m = re_neighbor.match(line)
        if m:
            neighbor = m.group(1)
            i += 1
            continue

        m = re_run.match(line)
        if m:
            exp_num = int(m.group(1))
            exp_name = m.group(2)
            fitness = None
            j = i + 1
            while j < len(lines):
                compl = re_completed.search(lines[j])
                if compl:
                    fitness = float(compl.group(1).replace(',', ''))
                    break
                if re_run.match(lines[j]) or re_neighbor.match(lines[j]) or re_layout.match(lines[j]) or re_dataset.match(lines[j]):
                    break
                j += 1
            if fitness is None:
                compl2 = re_completed.search(line)
                if compl2:
                    fitness = float(compl2.group(1).replace(',', ''))
            if fitness is not None:
                rows.append({
                    'dataset': dataset,
                    'initial_layout': layout,
                    'neighbor_method': neighbor,
                    'experiment_num': exp_num,
                    'experiment_name': exp_name,
                    'final_fitness': fitness,
                    'result_file': None
                })
            i = j
            continue

        m = re_result.match(line)
        if m:
            result_file = m.group(1).strip()
            for k in range(len(rows)-1, max(-1, len(rows)-6), -1):
                if rows[k]['dataset'] == dataset and rows[k]['initial_layout'] == layout:
                    rows[k]['result_file'] = result_file
            i += 1
            continue

        i += 1

    df = pd.DataFrame(rows)
    if df.empty:
        return df
    df['final_fitness'] = pd.to_numeric(df['final_fitness'], errors='coerce')
    df['dataset'] = df['dataset'].astype('category')
    df['initial_layout'] = df['initial_layout'].astype('category')
    df['neighbor_method'] = df['neighbor_method'].astype('category')
    df = df.sort_values(['dataset','initial_layout','neighbor_method','experiment_num']).reset_index(drop=True)
    return df


In [10]:
LOG_PATH = Path('sa_runs_log.txt')

if LOG_PATH.exists():
    raw_text = LOG_PATH.read_text(encoding='utf-8')

df = parse_sa_log(raw_text)
print(f'Filas parseadas: {len(df)}')
if df.empty:
    raise RuntimeError("DataFrame vacío: revisa que sa_runs_log.txt exista o que hayas pegado el log en RAW_LOG.")
df['neighbor_short'] = df['neighbor_method'].astype(str).map({'random':'global','local':'local'}).fillna(df['neighbor_method'].astype(str))
OUTPUT_DIR.mkdir(exist_ok=True)
df.to_csv(OUTPUT_DIR / 'parsed_sa_results.csv', index=False)


Filas parseadas: 320


In [11]:
import math

for dataset in sorted(df['dataset'].unique()):
    sub = df[df['dataset'] == dataset].copy()

    sub = sub[sub['neighbor_short'].isin(['local','global'])]
    if sub.empty:
        print(f"No hay datos 'local'/'global' para {dataset}, omitiendo.")
        continue

    stats = sub.groupby(['initial_layout','neighbor_short'])['final_fitness'].agg(['mean','std']).reset_index()
    pivot_mean = stats.pivot(index='initial_layout', columns='neighbor_short', values='mean').fillna(np.nan)
    pivot_std = stats.pivot(index='initial_layout', columns='neighbor_short', values='std').fillna(0)

    layouts = list(pivot_mean.index)
    methods = ['global','local'] 
    n_layouts = len(layouts)
    x = np.arange(n_layouts)
    width = 0.35

    fig, ax = plt.subplots(figsize=(max(8, n_layouts*0.6), 5))
    for i, method in enumerate(methods):
        means = pivot_mean.get(method)
        errs = pivot_std.get(method).fillna(0)
        if means is None:
            means = np.array([np.nan]*n_layouts)
            errs = np.array([0]*n_layouts)
        pos = x - width/2 + i*width
        ax.bar(pos, means, width, yerr=errs, capsize=4, label=method, alpha=0.9)

    all_vals = sub['final_fitness'].dropna()
    ymin = all_vals.min()
    ymax = all_vals.max()
    if math.isfinite(ymin) and math.isfinite(ymax):
        margin = max( (ymax - ymin) * 0.05, 1.0 )
        ax.set_ylim(max(0, ymin - margin), ymax + margin)

    ax.set_xticks(x)
    ax.set_xticklabels(layouts, rotation=45, ha='right')
    ax.set_ylabel('Final fitness (media ± std)')
    ax.set_title(f'{dataset}: Comparación global vs local por initial_layout')
    ax.legend(title='Swap')
    plt.tight_layout()
    fname = OUTPUT_DIR / f'{dataset}_grouped_bar_local_vs_global.png'
    plt.savefig(fname, dpi=300)
    plt.close()
    print('Guardado:', fname.resolve())


  stats = sub.groupby(['initial_layout','neighbor_short'])['final_fitness'].agg(['mean','std']).reset_index()


Guardado: /root/hybrid-keyboard-optimizer/results/sa/figures_comparison/moby_dick_grouped_bar_local_vs_global.png


  stats = sub.groupby(['initial_layout','neighbor_short'])['final_fitness'].agg(['mean','std']).reset_index()


Guardado: /root/hybrid-keyboard-optimizer/results/sa/figures_comparison/wizard_oz_grouped_bar_local_vs_global.png


In [12]:
for dataset in sorted(df['dataset'].unique()):
    sub = df[df['dataset'] == dataset].copy()
    sub = sub[sub['neighbor_short'].isin(['local','global'])]
    if sub.empty:
        print(f"No hay datos 'local'/'global' para {dataset}, omitiendo heatmap.")
        continue

    pivot = sub.pivot_table(index='initial_layout', columns='neighbor_short', values='final_fitness', aggfunc='min')
    cols_order = [c for c in ['global','local'] if c in pivot.columns]
    pivot = pivot[cols_order]

    plt.figure(figsize=(max(6, pivot.shape[1]*2.0), max(4, pivot.shape[0]*0.6)))
    sns.heatmap(pivot, annot=True, fmt='.1f', linewidths=.5, cbar_kws={'label': 'Min final_fitness'})
    plt.title(f'{dataset}: Heatmap (mínimo final_fitness) — initial_layout × swap (global/local)')
    plt.tight_layout()
    fname = OUTPUT_DIR / f'{dataset}_heatmap_min_local_vs_global.png'
    plt.savefig(fname, dpi=300)
    plt.close()
    print('Guardado:', fname.resolve())


  pivot = sub.pivot_table(index='initial_layout', columns='neighbor_short', values='final_fitness', aggfunc='min')


Guardado: /root/hybrid-keyboard-optimizer/results/sa/figures_comparison/moby_dick_heatmap_min_local_vs_global.png


  pivot = sub.pivot_table(index='initial_layout', columns='neighbor_short', values='final_fitness', aggfunc='min')


Guardado: /root/hybrid-keyboard-optimizer/results/sa/figures_comparison/wizard_oz_heatmap_min_local_vs_global.png
