In [1]:
# Импорты — трогай один раз и не мучай
import re
from pathlib import Path
from typing import List, Dict, Any, Tuple
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
LOG_PATH = Path(r"C:\Prgrm\BDCC_2025\results\results_mamba_2025-10-21_23-12-22\overrides.txt")

# читаем текст (на случай кривой кодировки попробуем cp1251)
try:
    text = LOG_PATH.read_text(encoding="utf-8")
except UnicodeDecodeError:
    text = LOG_PATH.read_text(encoding="cp1251")


In [3]:
# ====== НАСТРОЙКИ СМОТРИ ЗДЕСЬ ======
METRIC = "UAR_WSM"   # например: "UAR_WSM", "MF1_WSM", "RECALL_C2_PARKINSON_WSM"
SPLIT  = "TEST"      # "DEV" или "TEST"
TOP    = 25          # сколько верхних строк показывать
ASC    = False       # False = по убыванию (лучшие сверху)
INCLUDE_BOTH = False # True — показывать и DEV, и TEST; False — только выбранный сплит

# ====== ДАЛЬШЕ МОЖЕШЬ НЕ ТРОГАТЬ ======

def normalize_box_text(raw: str) -> str:
    """Убираем псевдографику и боковые │, оставляем чистый текст."""
    lines = []
    for line in raw.splitlines():
        s = line.rstrip("\n")
        # пропускаем рамочные строки
        if s.strip().startswith(("┌","└","┐","┘","─","—","━")):
            continue
        ss = s.strip()
        # срезаем крайние вертикальные
        if ss.startswith("│") and ss.endswith("│") and len(ss) >= 2:
            ss = ss[1:-1]
        elif ss.startswith("│"):
            ss = ss[1:]
        else:
            ss = s
            if ss.rstrip().endswith("│"):
                ss = ss.rstrip()[:-1]
        lines.append(ss.strip())
    return "\n".join(lines)

# Блок шага: имена параметров слева от "=", значения в скобках, тело до следующего "Шаг N" или конца
BLOCK_RE = re.compile(
    r"Шаг\s+(?P<step>\d+):\s*(?P<hpnames>[^=]+?)=\s*\((?P<hpvals>[^)]*)\)\s*(?P<body>.*?)(?=\n\s*Шаг\s+\d+:|\Z)",
    re.DOTALL | re.UNICODE
)
SECTION_RE = re.compile(r"Результаты\s*\((DEV|TEST)\)\s*:\s*", re.IGNORECASE | re.UNICODE)
METRIC_LINE_RE = re.compile(r"^\s*(?P<key>[A-Z0-9_]+)\s*=\s*(?P<val>\d+(?:\.\d+)?)\s*$", re.UNICODE)

def coerce_value(s: str):
    s = s.strip()
    if not s or s.lower() in {"none", "null", "nan"}:
        return None
    if re.fullmatch(r"[+-]?\d+", s):
        try: return int(s)
        except: pass
    if re.fullmatch(r"[+-]?\d*\.\d+", s):
        try: return float(s)
        except: pass
    return s  # на случай текстовых гиперпараметров

def parse_hp_names(names_raw: str) -> List[str]:
    # "a + b + c" → ["a","b","c"]
    return [x.strip() for x in names_raw.split("+") if x.strip()]

def parse_hp_vals(vals_raw: str) -> List[Any]:
    # "1, 2, None" → [1, 2, None]
    return [coerce_value(x) for x in vals_raw.split(",")]

def kv_from_names_vals(names: List[str], vals: List[Any]) -> Dict[str, Any]:
    out, n = {}, min(len(names), len(vals))
    for i in range(n): out[names[i]] = vals[i]
    for j in range(n, len(names)): out[names[j]] = None
    return out

def parse_section_metrics(text_block: str) -> Dict[str, float]:
    metrics = {}
    for line in text_block.splitlines():
        m = METRIC_LINE_RE.match(line.strip())
        if m:
            metrics[m.group("key").strip()] = float(m.group("val"))
    return metrics

def split_dev_test(body: str) -> Tuple[Dict[str, float], Dict[str, float]]:
    parts = list(SECTION_RE.split(body))
    dev, test = {}, {}
    for i in range(1, len(parts), 2):
        tag = parts[i].upper()
        text_part = parts[i+1]
        if tag == "DEV":
            dev = parse_section_metrics(text_part)
        elif tag == "TEST":
            test = parse_section_metrics(text_part)
    return dev, test

def parse_blocks_dynamic(text_src: str) -> List[Dict[str, Any]]:
    text_clean = normalize_box_text(text_src)
    rows = []
    for m in BLOCK_RE.finditer(text_clean):
        step = int(m.group("step"))
        hp_map = kv_from_names_vals(parse_hp_names(m.group("hpnames")),
                                    parse_hp_vals(m.group("hpvals")))
        dev, test = split_dev_test(m.group("body"))
        row = {"step": step, **hp_map}
        for k, v in dev.items():  row[f"{k}_DEV"]  = v
        for k, v in test.items(): row[f"{k}_TEST"] = v
        rows.append(row)
    rows.sort(key=lambda r: r["step"])
    return rows

def rows_to_dataframe_dynamic(rows: List[Dict[str, Any]]) -> pd.DataFrame:
    return pd.DataFrame(rows) if rows else pd.DataFrame()

def view_sorted_split(df: pd.DataFrame, metric: str, split: str,
                      top: int = 10, ascending: bool = False,
                      include_both: bool = False) -> pd.DataFrame:
    """
    Параметры (step + все НЕ *_DEV/*_TEST), затем сортируемая метрика,
    затем остальные метрики только выбранного сплита (если include_both=False).
    """
    split = split.upper()
    sort_col = f"{metric}_{split}"

    # meta = step + все НЕ метрики (то есть гиперпараметры)
    meta_cols = ["step"] + [c for c in df.columns
                            if not (c.endswith("_DEV") or c.endswith("_TEST")) and c != "step"]

    # Какие метрики вообще оставляем
    all_metric_cols = [c for c in df.columns if c.endswith("_DEV") or c.endswith("_TEST")]
    if include_both:
        keep_metric_cols = all_metric_cols[:]
    else:
        keep_metric_cols = [c for c in all_metric_cols if c.endswith(f"_{split}")]

    if sort_col not in keep_metric_cols:
        available = sorted(set(c.rsplit("_", 1)[0] for c in keep_metric_cols))
        raise ValueError(f"Колонка {sort_col} не найдена в выбранном сплите {split}. "
                         f"Доступные метрики: {available}")

    # Сорт-метрика впереди, остальное — после, по алфавиту
    metric_cols_ordered = [sort_col] + [c for c in sorted(keep_metric_cols) if c != sort_col]
    ordered_cols = meta_cols + metric_cols_ordered

    out = df.loc[:, ordered_cols].sort_values(sort_col, ascending=ascending)
    return out.head(top)

# ==== Парсим и показываем ====
_rows = parse_blocks_dynamic(text)
df = rows_to_dataframe_dynamic(_rows)

result_table = view_sorted_split(df, metric=METRIC, split=SPLIT,
                                 top=TOP, ascending=ASC, include_both=INCLUDE_BOTH)
result_table


Unnamed: 0,step,hidden_dim,out_features,mamba_d_state,mamba_ker_size,mamba_layers,UAR_WSM_TEST,MF1_WSM_TEST,RECALL_C0_CONTROL_WSM_TEST,RECALL_C1_DEPRESSION_WSM_TEST,RECALL_C2_PARKINSON_WSM_TEST
110,111,512,128,5,4,6,0.7391,0.6236,0.5725,0.8328,0.812
49,50,128,512,6,5,5,0.7301,0.6256,0.5759,0.8925,0.7218
47,48,128,512,6,4,6,0.7252,0.6059,0.5547,0.809,0.812
5,6,128,128,5,5,6,0.7205,0.6216,0.6004,0.794,0.7669
14,15,128,128,6,5,6,0.7182,0.6585,0.7154,0.7701,0.6692
130,131,512,256,5,5,5,0.7173,0.6258,0.6239,0.7761,0.7519
17,18,128,128,6,6,6,0.7167,0.5928,0.5279,0.8478,0.7744
2,3,128,128,5,4,6,0.7119,0.6368,0.6708,0.7731,0.6917
38,39,128,512,5,4,6,0.7113,0.6118,0.5837,0.8358,0.7143
6,7,128,128,5,6,4,0.7091,0.5805,0.5123,0.7881,0.8271


In [42]:
# ====== НАСТРОЙКИ СМОТРИ ЗДЕСЬ ======
METRIC = "UAR_WSM"   # например: "UAR_WSM", "MF1_WSM", "RECALL_C2_PARKINSON_WSM"
SPLIT  = "TEST"      # "DEV" или "TEST"
TOP    = 10          # сколько верхних строк показывать
ASC    = False       # False = по убыванию (лучшие сверху)
INCLUDE_BOTH = False # True — показывать и DEV, и TEST; False — только выбранный сплит

# ====== ДАЛЬШЕ МОЖЕШЬ НЕ ТРОГАТЬ ======

def normalize_box_text(raw: str) -> str:
    """Убираем псевдографику и боковые │, оставляем чистый текст."""
    lines = []
    for line in raw.splitlines():
        s = line.rstrip("\n")
        # пропускаем рамочные строки
        if s.strip().startswith(("┌","└","┐","┘","─","—","━")):
            continue
        ss = s.strip()
        # срезаем крайние вертикальные
        if ss.startswith("│") and ss.endswith("│") and len(ss) >= 2:
            ss = ss[1:-1]
        elif ss.startswith("│"):
            ss = ss[1:]
        else:
            ss = s
            if ss.rstrip().endswith("│"):
                ss = ss.rstrip()[:-1]
        lines.append(ss.strip())
    return "\n".join(lines)

# Блок шага: имена параметров слева от "=", значения в скобках, тело до следующего "Шаг N" или конца
BLOCK_RE = re.compile(
    r"Шаг\s+(?P<step>\d+):\s*(?P<hpnames>[^=]+?)=\s*\((?P<hpvals>[^)]*)\)\s*(?P<body>.*?)(?=\n\s*Шаг\s+\d+:|\Z)",
    re.DOTALL | re.UNICODE
)
SECTION_RE = re.compile(r"Результаты\s*\((DEV|TEST)\)\s*:\s*", re.IGNORECASE | re.UNICODE)
METRIC_LINE_RE = re.compile(r"^\s*(?P<key>[A-Z0-9_]+)\s*=\s*(?P<val>\d+(?:\.\d+)?)\s*$", re.UNICODE)

def coerce_value(s: str):
    s = s.strip()
    if not s or s.lower() in {"none", "null", "nan"}:
        return None
    if re.fullmatch(r"[+-]?\d+", s):
        try: return int(s)
        except: pass
    if re.fullmatch(r"[+-]?\d*\.\d+", s):
        try: return float(s)
        except: pass
    return s  # на случай текстовых гиперпараметров

def parse_hp_names(names_raw: str) -> List[str]:
    # "a + b + c" → ["a","b","c"]
    return [x.strip() for x in names_raw.split("+") if x.strip()]

def parse_hp_vals(vals_raw: str) -> List[Any]:
    # "1, 2, None" → [1, 2, None]
    return [coerce_value(x) for x in vals_raw.split(",")]

def kv_from_names_vals(names: List[str], vals: List[Any]) -> Dict[str, Any]:
    out, n = {}, min(len(names), len(vals))
    for i in range(n): out[names[i]] = vals[i]
    for j in range(n, len(names)): out[names[j]] = None
    return out

def parse_section_metrics(text_block: str) -> Dict[str, float]:
    metrics = {}
    for line in text_block.splitlines():
        m = METRIC_LINE_RE.match(line.strip())
        if m:
            metrics[m.group("key").strip()] = float(m.group("val"))
    return metrics

def split_dev_test(body: str) -> Tuple[Dict[str, float], Dict[str, float]]:
    parts = list(SECTION_RE.split(body))
    dev, test = {}, {}
    for i in range(1, len(parts), 2):
        tag = parts[i].upper()
        text_part = parts[i+1]
        if tag == "DEV":
            dev = parse_section_metrics(text_part)
        elif tag == "TEST":
            test = parse_section_metrics(text_part)
    return dev, test

def parse_blocks_dynamic(text_src: str) -> List[Dict[str, Any]]:
    text_clean = normalize_box_text(text_src)
    rows = []
    for m in BLOCK_RE.finditer(text_clean):
        step = int(m.group("step"))
        hp_map = kv_from_names_vals(parse_hp_names(m.group("hpnames")),
                                    parse_hp_vals(m.group("hpvals")))
        dev, test = split_dev_test(m.group("body"))
        row = {"step": step, **hp_map}
        for k, v in dev.items():  row[f"{k}_DEV"]  = v
        for k, v in test.items(): row[f"{k}_TEST"] = v
        rows.append(row)
    rows.sort(key=lambda r: r["step"])
    return rows

def rows_to_dataframe_dynamic(rows: List[Dict[str, Any]]) -> pd.DataFrame:
    return pd.DataFrame(rows) if rows else pd.DataFrame()

def view_sorted_split(df: pd.DataFrame, metric: str, split: str,
                      top: int = 10, ascending: bool = False,
                      include_both: bool = False) -> pd.DataFrame:
    """
    Параметры (step + все НЕ *_DEV/*_TEST), затем сортируемая метрика,
    затем остальные метрики только выбранного сплита (если include_both=False).
    """
    split = split.upper()
    sort_col = f"{metric}_{split}"

    # meta = step + все НЕ метрики (то есть гиперпараметры)
    meta_cols = ["step"] + [c for c in df.columns
                            if not (c.endswith("_DEV") or c.endswith("_TEST")) and c != "step"]

    # Какие метрики вообще оставляем
    all_metric_cols = [c for c in df.columns if c.endswith("_DEV") or c.endswith("_TEST")]
    if include_both:
        keep_metric_cols = all_metric_cols[:]
    else:
        keep_metric_cols = [c for c in all_metric_cols if c.endswith(f"_{split}")]

    if sort_col not in keep_metric_cols:
        available = sorted(set(c.rsplit("_", 1)[0] for c in keep_metric_cols))
        raise ValueError(f"Колонка {sort_col} не найдена в выбранном сплите {split}. "
                         f"Доступные метрики: {available}")

    # Сорт-метрика впереди, остальное — после, по алфавиту
    metric_cols_ordered = [sort_col] + [c for c in sorted(keep_metric_cols) if c != sort_col]
    ordered_cols = meta_cols + metric_cols_ordered

    out = df.loc[:, ordered_cols].sort_values(sort_col, ascending=ascending)
    return out.head(top)

# ==== Парсим и показываем ====
_rows = parse_blocks_dynamic(text)
df = rows_to_dataframe_dynamic(_rows)

result_table = view_sorted_split(df, metric=METRIC, split=SPLIT,
                                 top=TOP, ascending=ASC, include_both=INCLUDE_BOTH)
result_table


ValueError: Колонка UAR_WSM_TEST не найдена в выбранном сплите TEST. Доступные метрики: []

In [None]:
results_transformer_2025-09-05_08-42-02