In [15]:
import re
from pathlib import Path
from typing import List, Dict, Any, Tuple
import pandas as pd
import matplotlib.pyplot as plt


In [29]:
LOG_PATH = Path(r"C:\Prgrm\EAAI_2025\results\results_fusiontransformer_2025-10-08_08-37-23\overrides.txt")

# читаем текст (на случай кривой кодировки попробуем cp1251)
try:
    text = LOG_PATH.read_text(encoding="utf-8")
except UnicodeDecodeError:
    text = LOG_PATH.read_text(encoding="cp1251")


In [30]:
# -*- coding: utf-8 -*-
from __future__ import annotations

import re
import ast
from typing import List, Dict, Any, Tuple
import pandas as pd

# ====== НАСТРОЙКИ СМОТРИ ЗДЕСЬ ======
# Можно "AUTO" — выберет лучшую доступную по приоритету

METRIC = "MEAN_COMBO"   # например: "AUTO", "MEAN_COMBO", "MEAN_EMO", "MEAN_PKL", "ACC_FIV2", "CCC_FIV2", "MF1_CMU_MOSEI", "MUAR_CMU_MOSEI"
SPLIT  = "TEST"         # "DEV" или "TEST"
TOP    = 50             # сколько верхних строк показывать
ASC    = False          # False = по убыванию (лучшие сверху)
INCLUDE_BOTH = False    # True — показывать и DEV, и TEST; False — только выбранный сплит

# Алиасы со «старых» имён на новые (best guess для твоего формата)
METRIC_ALIASES: Dict[str, str] = {
    # Старые сводные на новый сводный
    "MEAN_ALL": "MEAN_COMBO",
    "MEAN": "MEAN_COMBO",
    # На всякий случай сопоставления для эмоций/личности
    "MEAN_EMO_ALL": "MEAN_EMO",
    "MEAN_PKL_ALL": "MEAN_PKL",
    # Если вдруг раньше писали просто UAR/MF1 без датасета
    "UAR": "MUAR_CMU_MOSEI",
    "MF1": "MF1_CMU_MOSEI",
}

# Приоритет для AUTO (чем левее — тем важнее). Под этот отчёт ставим COMBO первым.
AUTO_PRIORITY = [
    "MEAN_COMBO",
    "MEAN_EMO", "MEAN_PKL",
    "ACC_FIV2", "CCC_FIV2",
    "MF1_CMU_MOSEI", "MUAR_CMU_MOSEI",
]


# Нормализация рамок и псевдографики
_BOX_EDGE = tuple("┌┐└┘├┤┬┴┼─—━│╭╮╰╯╱╲═║")
def normalize_box_text(raw: str) -> str:
    """Убираем псевдографику и боковые рамки, оставляем текст."""
    lines = []
    for line in raw.splitlines():
        s = line.rstrip("\n")
        if not s.strip():
            lines.append("")
            continue
        if all(ch in _BOX_EDGE or ch.isspace() for ch in s):
            continue
        ss = s.strip()
        if (ss[:1] in {"│","║"} and ss[-1:] in {"│","║"}) and len(ss) >= 2:
            ss = ss[1:-1]
        else:
            if ss.startswith(("│","║")): ss = ss[1:]
            if ss.endswith(("│","║")):   ss = ss[:-1]
        lines.append(ss.strip())
    return "\n".join(lines)

# Блок шага
BLOCK_RE = re.compile(
    r"Шаг\s+(?P<step>\d+)\s*:?\s*(?P<hpnames>[^=]+?)=\s*\((?P<hpvals>[^)]*)\)\s*(?P<body>.*?)(?=\n\s*Шаг\s+\d+\s*:|\Z)",
    re.DOTALL | re.UNICODE | re.IGNORECASE
)

# Секции результатов
SECTION_RE = re.compile(
    r"(?:Результаты|Results)\s*\(\s*(DEV|TEST)\s*\)\s*:?\s*",
    re.IGNORECASE | re.UNICODE
)

# Метрики вида KEY = 0.812, 81.2%, 1e-3, 0,812, допускаем хвост «✅ bla»
METRIC_LINE_RE = re.compile(
    r"^\s*(?P<key>[A-Z0-9_]+)\s*=\s*(?P<val>[+-]?\d+(?:[.,]\d+)?(?:e[+-]?\d+)?%?)\b.*$",
    re.UNICODE | re.IGNORECASE
)

def _smart_split_commas(s: str) -> List[str]:
    out, buf = [], []
    lvl_par = lvl_brk = lvl_brc = 0
    q = None
    for i, ch in enumerate(s):
        if q:
            buf.append(ch)
            if ch == q and (i == 0 or s[i-1] != "\\"):
                q = None
            continue
        if ch in ("'", '"'):
            q = ch; buf.append(ch); continue
        if ch == "(": lvl_par += 1; buf.append(ch); continue
        if ch == ")": lvl_par = max(0, lvl_par-1); buf.append(ch); continue
        if ch == "[": lvl_brk += 1; buf.append(ch); continue
        if ch == "]": lvl_brk = max(0, lvl_brk-1); buf.append(ch); continue
        if ch == "{": lvl_brc += 1; buf.append(ch); continue
        if ch == "}": lvl_brc = max(0, lvl_brc-1); buf.append(ch); continue
        if ch == "," and (lvl_par or lvl_brk or lvl_brc):
            buf.append(ch); continue
        if ch == ",":
            out.append("".join(buf).strip()); buf = []; continue
        buf.append(ch)
    if buf: out.append("".join(buf).strip())
    return [x for x in out if x != ""]

def _to_number_or_none(val: str) -> float | None:
    s = val.strip().lower()
    if s.endswith("%"):
        s_num = s[:-1].strip().replace(",", ".")
        try: return float(s_num) / 100.0
        except: return None
    try: return float(s.replace(",", "."))
    except: return None

def coerce_value(s: str):
    s = s.strip()
    if not s or s.lower() in {"none", "null", "nan"}: return None
    if s.lower() == "true":  return True
    if s.lower() == "false": return False
    try: return ast.literal_eval(s)
    except: pass
    if re.fullmatch(r"[+-]?\d+(?:[.,]\d+)?(?:e[+-]?\d+)?", s, re.IGNORECASE):
        try: return float(s.replace(",", "."))
        except: pass
    return s

def parse_hp_names(names_raw: str) -> List[str]:
    return [x.strip() for x in re.split(r"[+,]", names_raw) if x.strip()]

def parse_hp_vals(vals_raw: str) -> List[Any]:
    return [coerce_value(x) for x in _smart_split_commas(vals_raw)]

def kv_from_names_vals(names: List[str], vals: List[Any]) -> Dict[str, Any]:
    out, n = {}, min(len(names), len(vals))
    for i in range(n): out[names[i]] = vals[i]
    for j in range(n, len(names)): out[names[j]] = None
    return out

def parse_section_metrics(text_block: str) -> Dict[str, float]:
    metrics: Dict[str, float] = {}
    for line in text_block.splitlines():
        m = METRIC_LINE_RE.match(line.strip())
        if not m: continue
        val = _to_number_or_none(m.group("val"))
        if val is None: continue
        key = m.group("key").strip().upper()
        metrics[key] = float(val)
    return metrics

def split_dev_test(body: str) -> Tuple[Dict[str, float], Dict[str, float]]:
    parts = list(SECTION_RE.split(body))
    dev, test = {}, {}
    if len(parts) == 1:
        test = parse_section_metrics(parts[0]); return dev, test
    for i in range(1, len(parts), 2):
        tag = parts[i].upper()
        text_part = parts[i+1]
        if tag == "DEV":  dev  = parse_section_metrics(text_part)
        if tag == "TEST": test = parse_section_metrics(text_part)
    return dev, test

def parse_blocks_dynamic(text_src: str) -> List[Dict[str, Any]]:
    text_clean = normalize_box_text(text_src)
    rows: List[Dict[str, Any]] = []
    for m in BLOCK_RE.finditer(text_clean):
        step = int(m.group("step"))
        hp_map = kv_from_names_vals(parse_hp_names(m.group("hpnames")),
                                    parse_hp_vals(m.group("hpvals")))
        dev, test = split_dev_test(m.group("body"))
        row = {"step": step, **hp_map}
        for k, v in dev.items():  row[f"{k.upper()}_DEV"]  = v
        for k, v in test.items(): row[f"{k.upper()}_TEST"] = v
        rows.append(row)
    rows.sort(key=lambda r: r["step"])
    return rows

def rows_to_dataframe_dynamic(rows: List[Dict[str, Any]]) -> pd.DataFrame:
    return pd.DataFrame(rows) if rows else pd.DataFrame()

def _available_metric_bases(cols: List[str]) -> List[str]:
    return sorted(set(c.rsplit("_", 1)[0] for c in cols))

def _resolve_metric_name(desired: str, available_bases: List[str]) -> str:
    """Пытаемся найти нужную метрику среди доступных (с учётом алиасов и «похожести»)."""
    d = desired.upper().strip()
    # алиас
    d = METRIC_ALIASES.get(d, d)
    if d in available_bases: 
        return d
    # грубый contains без подчёркиваний
    norm = lambda x: x.replace("_","")
    cand = [a for a in available_bases if norm(d) in norm(a)]
    if len(cand) == 1: 
        return cand[0]
    # startswith
    cand2 = [a for a in available_bases if a.startswith(d)]
    if len(cand2) == 1:
        return cand2[0]
    # last resort: если ничего — вернём пусто
    return ""

def view_sorted_split(df: pd.DataFrame, metric: str, split: str,
                      top: int = 10, ascending: bool = False,
                      include_both: bool = False) -> pd.DataFrame:
    """
    Параметры (step + все НЕ *_DEV/*_TEST), затем сортируемая метрика,
    затем остальные метрики только выбранного сплита (если include_both=False).
    """
    if df.empty:
        print("⚠️ Пустой DataFrame — ничего не распаршено. Принеси нормальный текст, пожалуйста.")
        return df

    split = split.upper().strip()
    # meta = step + все НЕ метрики (то есть гиперпараметры)
    meta_cols = ["step"] + [c for c in df.columns
                            if not (c.endswith("_DEV") or c.endswith("_TEST")) and c != "step"]

    # Какие метрики вообще оставляем
    all_metric_cols = [c for c in df.columns if c.endswith("_DEV") or c.endswith("_TEST")]
    keep_metric_cols = all_metric_cols[:] if include_both else [c for c in all_metric_cols if c.endswith(f"_{split}")]

    bases = _available_metric_bases(keep_metric_cols)

    # Определяем целевую метрику
    metric_upper = (metric or "").upper().strip()
    if metric_upper == "AUTO" or not metric_upper:
        # выбираем первую из AUTO_PRIORITY, которая есть
        chosen = next((m for m in AUTO_PRIORITY if m in bases), None)
        if not chosen:
            raise ValueError(f"Не нашёл подходящую метрику для AUTO. Доступные: {bases}")
    else:
        chosen = _resolve_metric_name(metric_upper, bases)
        if not chosen:
            # попробуем умно подсказать
            hint = ", ".join(bases) if bases else "—"
            raise ValueError(f"Метрика '{metric}' не найдена в сплите {split}. Доступные: {hint}")

    sort_col = f"{chosen}_{split}"

    # Сорт-метрика впереди, остальное — после, по алфавиту
    metric_cols_ordered = [sort_col] + [c for c in sorted(keep_metric_cols) if c != sort_col]
    ordered_cols = meta_cols + metric_cols_ordered

    out = df.loc[:, ordered_cols].sort_values(sort_col, ascending=ascending)
    return out.head(top)

# ==== Парсим и показываем ====
# ожидается, что переменная `text` содержит твой длинный блок
_rows = parse_blocks_dynamic(text)
df = rows_to_dataframe_dynamic(_rows)

result_table = view_sorted_split(
    df,
    metric=METRIC,      # "AUTO" или конкретное имя новой метрики, например "MEAN_ALL"
    split=SPLIT,
    top=TOP,
    ascending=ASC,
    include_both=INCLUDE_BOTH
)
result_table


Unnamed: 0,step,hidden_dim,out_features,num_transformer_heads,dropout,MEAN_COMBO_TEST,ACC_FIV2_TEST,CCC_FIV2_TEST,MEAN_EMO_TEST,MEAN_PKL_TEST,MF1_CMU_MOSEI_TEST,MUAR_CMU_MOSEI_TEST
2,3,128,256,2,0.2,0.7057,0.9134,0.6621,0.6237,0.7878,0.6211,0.6264
14,15,128,512,2,0.2,0.7057,0.9134,0.6621,0.6237,0.7878,0.6211,0.6264
16,17,128,512,4,0.15,0.7054,0.9131,0.6689,0.6199,0.791,0.6189,0.6208
4,5,128,256,4,0.15,0.7054,0.9131,0.6689,0.6199,0.791,0.6189,0.6208
0,1,128,256,2,0.1,0.7044,0.912,0.6655,0.62,0.7887,0.6175,0.6226
12,13,128,512,2,0.1,0.7044,0.912,0.6655,0.62,0.7887,0.6175,0.6226
20,21,128,512,8,0.2,0.7042,0.9137,0.6607,0.6212,0.7872,0.6183,0.6241
8,9,128,256,8,0.2,0.7042,0.9137,0.6607,0.6212,0.7872,0.6183,0.6241
10,11,128,256,16,0.15,0.7042,0.9136,0.6698,0.6166,0.7917,0.6147,0.6186
6,7,128,256,8,0.1,0.703,0.9126,0.6627,0.6183,0.7877,0.616,0.6205
