# Температурный пайплайн (модульный)
*Последнее обновление: 2025-09-07*


In [82]:
# === 1) Параметры минимального пайплайна ===
import os, io, csv, glob
from typing import List, Tuple, Optional, Dict

import numpy as np
import pandas as pd
from IPython.display import display

# Параметры
# DATE_FORMAT — формат даты в исходных файлах; оставьте пустым, чтобы pandas подбирал его автоматически.
# REF_IDX — индекс эталонного датчика (столбец T{REF_IDX}); относительно него оцениваем остальные каналы.
# N_FOLLOW — число соседних значений, которые проверяются вместе при поиске стабильных интервалов.
# WINDOW_N — длина скользящего окна в отсчётах для расчёта средних и стандартных отклонений.
# STD_THR — максимальное допустимое стандартное отклонение внутри окна; выше — считаем участок нестабильным.
# DIFF_THR — допустимая средняя разница между последовательными измерениями; превышение сигналит о скачках.
# MIN_LEN — минимальное число точек, чтобы сегмент считался значимым при анализе стабильности.
# DEG_TOL — допускаемое расхождение каналов с эталоном в градусах, пока они считаются исправными.
# MAX_REF_RANGE — предельный размах значений эталонного канала; если шире, эталон нужно пересмотреть.
# GROUP_BY_FILE — при True данные обрабатываются по каждому исходному файлу отдельно, а не одной лентой.
DATE_FORMAT = ''
REF_IDX = 8
N_FOLLOW = 3
WINDOW_N = 50
STD_THR = 1e-3
DIFF_THR = 2e-3
MIN_LEN = 20
DEG_TOL = 1.0
MAX_REF_RANGE = 0.3
GROUP_BY_FILE = True

# Имена датчиков для отчёта
REF_NAME = globals().get('REF_NAME', 'Эталон 2-1')
SENSOR_NAMES = globals().get('SENSOR_NAMES', {
    'T9': 'Датчик 1-7',
    'T10': 'Датчик 1-8',
    'T11': 'Датчик 1-9',
})
# Альтернатива: список имён в порядке датчиков T{REF_IDX+1}..T{REF_IDX+N_FOLLOW}


In [83]:
# === 2) Загрузка/парсинг ===
def sniff_sep(sample: bytes) -> str:
    try:
        dialect = csv.Sniffer().sniff(sample.decode('utf-8', errors='ignore'),
                                      delimiters=[',',';','\t','|'])
        return dialect.delimiter
    except Exception:
        line = sample.decode('utf-8', errors='ignore').splitlines()[0] if sample else ''
        for cand in [',',';','\t','|']:
            if line.count(cand) >= 1:
                return cand
        return ','

def read_one_table(name: str, stream: io.BytesIO, date_format: Optional[str]=None) -> pd.DataFrame:
    head = stream.read(8192); stream.seek(0)
    sep = sniff_sep(head)
    df = pd.read_csv(stream, sep=sep, engine='python')
    if df.shape[1] < 17:
        raise ValueError(f"{name}: найдено {df.shape[1]} столбцов, требуется >= 17 (1 дата + 16 температур).")
    df = df.iloc[:, :17].copy()
    df.columns = ['date'] + [f'T{i}' for i in range(16)]
    if date_format and date_format.strip():
        df['date'] = pd.to_datetime(df['date'], format=date_format, errors='coerce')
    else:
        df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True, errors='coerce', dayfirst=True)
    if df['date'].isna().any():
        bad = int(df['date'].isna().sum()); print(f"[Предупреждение] {name}: {bad} строк с нераспознанной датой отброшены.")
        df = df.dropna(subset=['date'])
    for c in [f'T{i}' for i in range(16)]:
        df[c] = pd.to_numeric(df[c], errors='coerce')
    df['source_file'] = name
    return df


In [84]:
# === 3) Функции стабильности (минимум) ===
import numpy as np
import pandas as pd
from typing import List, Tuple, Sequence, Dict, Optional

def rolling_std_mask(series, window, threshold):
    rs = series.rolling(window=window, min_periods=window).std()
    return (rs <= threshold)

def rolling_mean_abs_diff_mask(series, window, diff_threshold):
    d = series.diff().abs()
    m = d.rolling(window=window, min_periods=window).mean()
    return (m <= diff_threshold)

def segments_from_mask(mask, window):
    import numpy as _np
    arr = mask.to_numpy() if hasattr(mask, 'to_numpy') else _np.asarray(mask)
    segs = []; cur=None
    for i, ok in enumerate(arr):
        if ok:
            s = max(0, i - window + 1); e = i
            if cur is None: cur=(s,e)
            else:
                cs, ce = cur
                if s <= ce + 1: cur=(cs, max(ce, e))
                else: segs.append(cur); cur=(s,e)
        else:
            if cur is not None: segs.append(cur); cur=None
    if cur is not None: segs.append(cur)
    return segs

def summarize_interval(df, cols, s, e):
    row={'start_idx':int(s),'end_idx':int(e),'length':int(e-s+1)}
    row['start_date']=pd.to_datetime(df.loc[s,'date']) if s < len(df) else pd.NaT
    row['end_date']=pd.to_datetime(df.loc[e,'date']) if e < len(df) else pd.NaT
    for c in cols:
        vals = df[c].to_numpy()[s:e+1]
        good = ~np.isnan(vals)
        row[f'mean_{c}'] = float(np.nanmean(vals)) if good.any() else np.nan
        row[f'std_{c}'] = float(np.nanstd(vals, ddof=1)) if good.sum()>1 else np.nan
        row[f'min_{c}'] = float(np.nanmin(vals)) if good.any() else np.nan
        row[f'max_{c}'] = float(np.nanmax(vals)) if good.any() else np.nan
        row[f'range_{c}'] = row[f'max_{c}'] - row[f'min_{c}'] if good.any() else np.nan
    return row

def median_level(series, s, e):
    vals = series.to_numpy()[int(s):int(e)+1]
    return float(np.nanmedian(vals))

def split_segment_by_ref_buckets(ref_series, s, e, deg_tol):
    vals = ref_series.to_numpy()[s:e+1]
    if len(vals)==0 or np.all(np.isnan(vals)): return [(s,e)]
    buckets = np.floor(vals/float(deg_tol)).astype('float64')
    mask_good = ~np.isnan(buckets)
    if mask_good.any():
        last=None
        for i in range(len(buckets)):
            if np.isnan(buckets[i]):
                buckets[i] = last if last is not None else buckets[mask_good][0]
            last=buckets[i]
    segments=[]; start=s; base=buckets[0]
    for i in range(1,len(buckets)):
        if buckets[i]!=base:
            segments.append((start, s+i-1)); start=s+i; base=buckets[i]
    segments.append((start,e))
    return segments

def detect_stability_improved(data, ref_idx, follow_idxs, window=20, std_thr=0.05, diff_thr=0.002, min_len=20, group_by_file=True, split_by_ref_buckets=True, deg_tol=1.0, max_ref_range=0.3):
    ref_col=f'T{ref_idx}'; follow_cols=[f'T{i}' for i in follow_idxs]
    for c in [ref_col]+follow_cols:
        if c not in data.columns: raise ValueError(f'Missing column: {c}')
    if group_by_file and 'source_file' in data.columns:
        first_idx_by_source = (data.index.to_series().groupby(data['source_file']).min().astype(int).to_dict())
        groups=list(data.groupby('source_file', sort=False))
    else:
        first_idx_by_source=None; groups=[('ALL', data)]
    joint_rows=[]; by_sensor_rows=[]
    for src, g0 in groups:
        g=g0.reset_index(drop=True)
        mask_ref_std = rolling_std_mask(g[ref_col], window, std_thr)
        mask_ref_slope = rolling_mean_abs_diff_mask(g[ref_col], window, diff_thr)
        base_ref_mask = (mask_ref_std & mask_ref_slope).to_numpy()
        masks_others = {c: rolling_std_mask(g[c], window, std_thr).to_numpy() for c in follow_cols}
        joint_mask = base_ref_mask.copy()
        for c in follow_cols: joint_mask &= masks_others[c]
        def _emit_segment(s,e,cols,extra,container):
            subs=[(s,e)]
            if split_by_ref_buckets: subs=split_segment_by_ref_buckets(g[ref_col], s, e, deg_tol)
            for ss,ee in subs:
                if (ee-ss+1) < int(min_len): continue
                row={'source_file':src}; row.update(extra); row.update(summarize_interval(g, cols, ss, ee))
                if src!='ALL' and first_idx_by_source is not None:
                    first_idx=int(first_idx_by_source.get(src,0)); row['start_idx_abs']=row['start_idx']+first_idx; row['end_idx_abs']=row['end_idx']+first_idx
                else:
                    row['start_idx_abs']=row['start_idx']; row['end_idx_abs']=row['end_idx']
                ref_med=median_level(g[ref_col], ss, ee); row['ref_level']=ref_med; row['range_ref']=row.get(f'range_{ref_col}', np.nan)
                if max_ref_range is not None and not np.isnan(row['range_ref']) and row['range_ref']>max_ref_range: continue
                container.append(row)
        for (s,e) in [(s,e) for (s,e) in segments_from_mask(joint_mask, window) if (e-s+1)>=int(min_len)]:
            _emit_segment(s,e,[ref_col]+follow_cols, {'ref':ref_col, 'followers':','.join(follow_cols)}, joint_rows)
        for c in follow_cols:
            gated = masks_others[c] & base_ref_mask
            segs=[(s,e) for (s,e) in segments_from_mask(gated, window) if (e-s+1)>=int(min_len)]
            for (s,e) in segs:
                _emit_segment(s,e,[c, ref_col], {'sensor':c, 'ref':ref_col}, by_sensor_rows)
    joint = pd.DataFrame(joint_rows).sort_values(['source_file','start_idx']) if joint_rows else pd.DataFrame()
    by_sensor = pd.DataFrame(by_sensor_rows).sort_values(['source_file','sensor','start_idx']) if by_sensor_rows else pd.DataFrame()
    return joint, by_sensor

def select_longest_per_degree(table, data, ref_idx, mode, deg_tol=1.0, strategy='bucket_centered', centered=True):
    if table is None or table.empty: return table
    ref_col=f'T{ref_idx}'; tbl=table.copy()
    if 'ref_level' not in tbl.columns:
        ref_levels=[]
        for _,row in tbl.iterrows():
            src=row['source_file']; s,e=int(row['start_idx']),int(row['end_idx'])
            g = data[data['source_file']==src].reset_index(drop=True) if src!='ALL' else data
            ref_levels.append(median_level(g[ref_col], s, e))
        tbl['ref_level']=ref_levels
    group_keys=['source_file','sensor'] if (mode=='by_sensor' and 'sensor' in tbl.columns) else ['source_file']
    out=[]
    for _, grp in tbl.sort_values('ref_level').groupby(group_keys, as_index=False):
        g=grp.sort_values('ref_level').reset_index(drop=True)
        if strategy in ('bucket','bucket_centered') or centered:
            bins = np.floor((g['ref_level'] + 0.5*deg_tol)/deg_tol).astype(int) if (strategy=='bucket_centered' or centered) else np.floor(g['ref_level']/deg_tol).astype(int)
            g=g.assign(_bin=bins)
            keep = g.sort_values('length', ascending=False).groupby('_bin', as_index=False).head(1)
            out.append(keep.drop(columns=['_bin']))
        else:
            i,n=0,len(g)
            while i<n:
                start_val=g.loc[i,'ref_level']; j=i
                while j+1<n and (g.loc[j+1,'ref_level']-start_val) <= deg_tol: j+=1
                cluster=g.loc[i:j].copy()
                keep = cluster.sort_values(['length','end_idx'], ascending=[False,False]).iloc[0:1]
                out.append(keep); i=j+1
    return pd.concat(out, ignore_index=True) if out else tbl


In [85]:
# === 3.1) Виджеты/загрузка — упрощено ===
print('Секция виджетов удалена в упрощённом режиме.')
IN_COLAB=False

Секция виджетов удалена в упрощённом режиме.


In [None]:
# === 4) Загрузка через диалог (Colab/Jupyter) ===
# Делает автодетект среды и НЕ блокирует ядро долгими ожиданиями.
# Результат: глобалы SELECTED_FILES и IS_BYTES_INPUT.
try:
    if IN_COLAB:
        # Google Colab: встроенный загрузчик
        print('Выберите один или несколько .csv/.txt файлов…')
        uploads = files.upload()
        SELECTED_FILES = [('uploaded:' + name, content) for name, content in uploads.items()]
        IS_BYTES_INPUT = True
        print('Загружено файлов:', len(SELECTED_FILES))
    else:
        # Локальный Jupyter: сначала пробуем ipywidgets (не блокирует выполнение)
        try:
            import ipywidgets as widgets
            from IPython.display import display
            uploader = widgets.FileUpload(accept='.csv,.txt', multiple=True)
            status = widgets.Output()

            def _extract_files(val):
                files = []
                try:
                    # ipywidgets v7: dict, v8: tuple/list из объектов с атрибутами .name/.content
                    if isinstance(val, dict):
                        for _name, _item in val.items():
                            try:
                                content = _item.get('content') if isinstance(_item, dict) else getattr(_item, 'content', None)
                            except Exception:
                                content = None
                            if _name is not None and content is not None:
                                files.append(('uploaded:' + _name, content))
                    else:
                        for _it in list(val) if isinstance(val, (list, tuple)) else []:
                            name2 = getattr(_it, 'name', None) if not isinstance(_it, dict) else _it.get('name')
                            content2 = getattr(_it, 'content', None) if not isinstance(_it, dict) else _it.get('content')
                            if name2 is not None and content2 is not None:
                                files.append(('uploaded:' + name2, content2))
                except Exception as _e:
                    with status:
                        print('Ошибка разборки загруженных файлов:', _e)
                return files

            def _on_change(change):
                global SELECTED_FILES, IS_BYTES_INPUT
                files = _extract_files(uploader.value)
                if files:
                    SELECTED_FILES = files
                    IS_BYTES_INPUT = True
                    with status:
                        status.clear_output()
                        print('Загружено файлов:', len(SELECTED_FILES))

            uploader.observe(_on_change, names='value')
            display(uploader, status)
            print('Выберите файлы в виджете выше, затем перейдите к шагу 6.')
        except Exception as e_wid:
            # Фолбэк: системный диалог (может не работать в headless)
            try:
                import tkinter as tk
                from tkinter import filedialog
                root = tk.Tk(); root.withdraw()
                print('Откроется системное окно выбора файлов…')
                paths = filedialog.askopenfilenames(title='Выберите .csv/.txt', filetypes=[('CSV/TXT','*.csv *.txt'), ('All','*.*')])
                paths = list(paths)
                if paths:
                    SELECTED_FILES = paths
                    IS_BYTES_INPUT = False
                    print('Выбрано файлов:', len(SELECTED_FILES))
                else:
                    print('Диалог закрыт без выбора. Повторите или используйте шаг 5.')
            except Exception as e_tk:
                print('Не удалось открыть диалог выбора файлов. Сообщение:', e_tk)
                print('В качестве альтернативы можно указать пути вручную и сразу перейти к шагу 6.')
except Exception as e:
    print('Ошибка при загрузке через диалог:', e)


FileUpload(value={}, accept='.csv,.txt', description='Upload', multiple=True)

Output()

Выберите файлы в виджете выше, затем перейдите к шагу 6.


In [87]:
# === 5) Google Drive — упрощено ===
print('Секция Google Drive пропущена в упрощённом режиме.')


Секция Google Drive пропущена в упрощённом режиме.


In [88]:
# === 6) Загрузка данных ===
frames, errors = [], []
import os, io
if os.path.exists('combined_temperatures.csv'):
    data = pd.read_csv('combined_temperatures.csv')
    DATA = data.copy()
    print('Загружен combined_temperatures.csv:', data.shape)
else:
    assert 'SELECTED_FILES' in globals(), 'Выберите файлы (секция 4).'
    for item in SELECTED_FILES:
        try:
            if isinstance(item, tuple) and len(item)==2:
                name, content = item
                bio = io.BytesIO(content)
                df = read_one_table(name, bio, date_format=DATE_FORMAT or None)
            else:
                path = item
                with open(path, 'rb') as f:
                    df = read_one_table(os.path.basename(path), io.BytesIO(f.read()), date_format=DATE_FORMAT or None)
            frames.append(df)
        except Exception as e:
            errors.append((str(item), str(e)))
    if not frames:
        raise RuntimeError('Не удалось прочитать ни один файл.')
    data = pd.concat(frames, ignore_index=True).sort_values('date').reset_index(drop=True)
    follow_idxs = [REF_IDX + i for i in range(1, N_FOLLOW + 1) if REF_IDX + i <= 15]
    cols_keep = ['date', f'T{REF_IDX}'] + [f'T{i}' for i in follow_idxs] + ['source_file']
    data = data[[c for c in cols_keep if c in data.columns]]
    DATA = data
    print('Собранный датафрейм:', data.shape)
display(DATA.head(10))


Загружен combined_temperatures.csv: (25934, 6)


Unnamed: 0,date,T8,T9,T10,T11,source_file
0,2025-08-15 19:43:16.634,26.74,26.776,27.003,26.844,uploaded:Log_192.168.77.10_2025_08_15.txt
1,2025-08-15 19:43:17.642,26.739,26.777,27.002,26.844,uploaded:Log_192.168.77.10_2025_08_15.txt
2,2025-08-15 19:43:18.642,26.739,26.777,27.001,26.843,uploaded:Log_192.168.77.10_2025_08_15.txt
3,2025-08-15 19:43:19.643,26.738,26.776,26.998,26.843,uploaded:Log_192.168.77.10_2025_08_15.txt
4,2025-08-15 19:43:20.643,26.738,26.775,26.998,26.842,uploaded:Log_192.168.77.10_2025_08_15.txt
5,2025-08-15 19:43:21.644,26.737,26.774,26.997,26.841,uploaded:Log_192.168.77.10_2025_08_15.txt
6,2025-08-15 19:43:22.644,26.738,26.775,26.994,26.841,uploaded:Log_192.168.77.10_2025_08_15.txt
7,2025-08-15 19:43:23.645,26.736,26.774,26.993,26.84,uploaded:Log_192.168.77.10_2025_08_15.txt
8,2025-08-15 19:43:24.645,26.735,26.774,26.992,26.839,uploaded:Log_192.168.77.10_2025_08_15.txt
9,2025-08-15 19:43:25.646,26.734,26.774,26.99,26.839,uploaded:Log_192.168.77.10_2025_08_15.txt


In [89]:
# --- Guard: ensure stability functions exist ---
if 'detect_stability_improved' not in globals() or 'select_longest_per_degree' not in globals():
    import numpy as np
    import pandas as pd
    from typing import List, Tuple, Sequence, Dict, Optional

    def rolling_std_mask(series, window, threshold):
        rs = series.rolling(window=window, min_periods=window).std()
        return (rs <= threshold)

    def rolling_mean_abs_diff_mask(series, window, diff_threshold):
        d = series.diff().abs()
        m = d.rolling(window=window, min_periods=window).mean()
        return (m <= diff_threshold)

    def segments_from_mask(mask, window):
        import numpy as _np
        arr = mask.to_numpy() if hasattr(mask, 'to_numpy') else _np.asarray(mask)
        segs = []; cur=None
        for i, ok in enumerate(arr):
            if ok:
                s = max(0, i - window + 1); e = i
                if cur is None: cur=(s,e)
                else:
                    cs, ce = cur
                    if s <= ce + 1: cur=(cs, max(ce, e))
                    else: segs.append(cur); cur=(s,e)
            else:
                if cur is not None: segs.append(cur); cur=None
        if cur is not None: segs.append(cur)
        return segs

    def summarize_interval(df, cols, s, e):
        row={'start_idx':int(s),'end_idx':int(e),'length':int(e-s+1)}
        row['start_date']=pd.to_datetime(df.loc[s,'date']) if s < len(df) else pd.NaT
        row['end_date']=pd.to_datetime(df.loc[e,'date']) if e < len(df) else pd.NaT
        for c in cols:
            vals = df[c].to_numpy()[s:e+1]
            good = ~np.isnan(vals)
            row[f'mean_{c}'] = float(np.nanmean(vals)) if good.any() else np.nan
            row[f'std_{c}'] = float(np.nanstd(vals, ddof=1)) if good.sum()>1 else np.nan
            row[f'min_{c}'] = float(np.nanmin(vals)) if good.any() else np.nan
            row[f'max_{c}'] = float(np.nanmax(vals)) if good.any() else np.nan
            row[f'range_{c}'] = row[f'max_{c}'] - row[f'min_{c}'] if good.any() else np.nan
        return row

    def median_level(series, s, e):
        vals = series.to_numpy()[int(s):int(e)+1]
        return float(np.nanmedian(vals))

    def split_segment_by_ref_buckets(ref_series, s, e, deg_tol):
        vals = ref_series.to_numpy()[s:e+1]
        if len(vals)==0 or np.all(np.isnan(vals)): return [(s,e)]
        buckets = np.floor(vals/float(deg_tol)).astype('float64')
        mask_good = ~np.isnan(buckets)
        if mask_good.any():
            last=None
            for i in range(len(buckets)):
                if np.isnan(buckets[i]):
                    buckets[i] = last if last is not None else buckets[mask_good][0]
                last=buckets[i]
        segments=[]; start=s; base=buckets[0]
        for i in range(1,len(buckets)):
            if buckets[i]!=base:
                segments.append((start, s+i-1)); start=s+i; base=buckets[i]
        segments.append((start,e))
        return segments

    def detect_stability_improved(data, ref_idx, follow_idxs, window=20, std_thr=0.05, diff_thr=0.002, min_len=20, group_by_file=True, split_by_ref_buckets=True, deg_tol=1.0, max_ref_range=0.3):
        ref_col=f'T{ref_idx}'; follow_cols=[f'T{i}' for i in follow_idxs]
        for c in [ref_col]+follow_cols:
            if c not in data.columns: raise ValueError(f'Missing column: {c}')
        if group_by_file and 'source_file' in data.columns:
            first_idx_by_source = (data.index.to_series().groupby(data['source_file']).min().astype(int).to_dict())
            groups=list(data.groupby('source_file', sort=False))
        else:
            first_idx_by_source=None; groups=[('ALL', data)]
        joint_rows=[]; by_sensor_rows=[]
        for src, g0 in groups:
            g=g0.reset_index(drop=True)
            mask_ref_std = rolling_std_mask(g[ref_col], window, std_thr)
            mask_ref_slope = rolling_mean_abs_diff_mask(g[ref_col], window, diff_thr)
            base_ref_mask = (mask_ref_std & mask_ref_slope).to_numpy()
            masks_others = {c: rolling_std_mask(g[c], window, std_thr).to_numpy() for c in follow_cols}
            joint_mask = base_ref_mask.copy()
            for c in follow_cols: joint_mask &= masks_others[c]
            def _emit_segment(s,e,cols,extra,container):
                subs=[(s,e)]
                if split_by_ref_buckets: subs=split_segment_by_ref_buckets(g[ref_col], s, e, deg_tol)
                for ss,ee in subs:
                    if (ee-ss+1) < int(min_len): continue
                    row={'source_file':src}; row.update(extra); row.update(summarize_interval(g, cols, ss, ee))
                    if src!='ALL' and first_idx_by_source is not None:
                        first_idx=int(first_idx_by_source.get(src,0)); row['start_idx_abs']=row['start_idx']+first_idx; row['end_idx_abs']=row['end_idx']+first_idx
                    else:
                        row['start_idx_abs']=row['start_idx']; row['end_idx_abs']=row['end_idx']
                    ref_med=median_level(g[ref_col], ss, ee); row['ref_level']=ref_med; row['range_ref']=row.get(f'range_{ref_col}', np.nan)
                    if max_ref_range is not None and not np.isnan(row['range_ref']) and row['range_ref']>max_ref_range: continue
                    container.append(row)
            for (s,e) in [(s,e) for (s,e) in segments_from_mask(joint_mask, window) if (e-s+1)>=int(min_len)]:
                _emit_segment(s,e,[ref_col]+follow_cols, {'ref':ref_col, 'followers':','.join(follow_cols)}, joint_rows)
            for c in follow_cols:
                gated = masks_others[c] & base_ref_mask
                segs=[(s,e) for (s,e) in segments_from_mask(gated, window) if (e-s+1)>=int(min_len)]
                for (s,e) in segs:
                    _emit_segment(s,e,[c, ref_col], {'sensor':c, 'ref':ref_col}, by_sensor_rows)
        joint = pd.DataFrame(joint_rows).sort_values(['source_file','start_idx']) if joint_rows else pd.DataFrame()
        by_sensor = pd.DataFrame(by_sensor_rows).sort_values(['source_file','sensor','start_idx']) if by_sensor_rows else pd.DataFrame()
        return joint, by_sensor

    def select_longest_per_degree(table, data, ref_idx, mode, deg_tol=1.0, strategy='bucket_centered', centered=True):
        if table is None or table.empty: return table
        ref_col=f'T{ref_idx}'; tbl=table.copy()
        if 'ref_level' not in tbl.columns:
            ref_levels=[]
            for _,row in tbl.iterrows():
                src=row['source_file']; s,e=int(row['start_idx']),int(row['end_idx'])
                g = data[data['source_file']==src].reset_index(drop=True) if src!='ALL' else data
                ref_levels.append(median_level(g[ref_col], s, e))
            tbl['ref_level']=ref_levels
        group_keys=['source_file','sensor'] if (mode=='by_sensor' and 'sensor' in tbl.columns) else ['source_file']
        out=[]
        for _, grp in tbl.sort_values('ref_level').groupby(group_keys, as_index=False):
            g=grp.sort_values('ref_level').reset_index(drop=True)
            if strategy in ('bucket','bucket_centered') or centered:
                bins = np.floor((g['ref_level'] + 0.5*deg_tol)/deg_tol).astype(int) if (strategy=='bucket_centered' or centered) else np.floor(g['ref_level']/deg_tol).astype(int)
                g=g.assign(_bin=bins)
                keep = g.sort_values('length', ascending=False).groupby('_bin', as_index=False).head(1)
                out.append(keep.drop(columns=['_bin']))
            else:
                i,n=0,len(g)
                while i<n:
                    start_val=g.loc[i,'ref_level']; j=i
                    while j+1<n and (g.loc[j+1,'ref_level']-start_val) <= deg_tol: j+=1
                    cluster=g.loc[i:j].copy()
                    keep = cluster.sort_values(['length','end_idx'], ascending=[False,False]).iloc[0:1]
                    out.append(keep); i=j+1
        return pd.concat(out, ignore_index=True) if out else tbl

# === 7) Поиск стабильных интервалов (улучшенный) ===
assert DATA is not None, 'Нет DATA.'
follow_idxs = [REF_IDX + i for i in range(1, N_FOLLOW + 1) if REF_IDX + i <= 15]
STABLE_JOINT, STABLE_BY_SENSOR = detect_stability_improved(
    DATA, ref_idx=REF_IDX, follow_idxs=follow_idxs,
    window=WINDOW_N, std_thr=STD_THR, diff_thr=DIFF_THR,
    min_len=MIN_LEN, group_by_file=GROUP_BY_FILE,
    split_by_ref_buckets=True, deg_tol=DEG_TOL, max_ref_range=MAX_REF_RANGE
)
print('Обнаружено интервалов: совместных =', 0 if STABLE_JOINT is None else len(STABLE_JOINT),
      '; по датчику =', 0 if STABLE_BY_SENSOR is None else len(STABLE_BY_SENSOR))
if STABLE_BY_SENSOR is not None and not STABLE_BY_SENSOR.empty:
    display(STABLE_BY_SENSOR.head(5))


Обнаружено интервалов: совместных = 161 ; по датчику = 375


Unnamed: 0,source_file,sensor,ref,start_idx,end_idx,length,start_date,end_date,mean_T9,std_T9,...,mean_T10,std_T10,min_T10,max_T10,range_T10,mean_T11,std_T11,min_T11,max_T11,range_T11
121,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,2549,2615,67,2025-08-15 20:25:46.786,2025-08-15 20:26:52.819,,,...,19.219493,0.000927,19.218,19.223,0.005,,,,,
122,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,2570,2630,61,2025-08-15 20:26:07.797,2025-08-15 20:27:07.826,,,...,19.220049,0.000956,19.218,19.223,0.005,,,,,
123,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,2598,2650,53,2025-08-15 20:26:35.810,2025-08-15 20:27:27.836,,,...,19.220868,0.00102,19.219,19.223,0.004,,,,,
124,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,2632,2847,216,2025-08-15 20:27:09.827,2025-08-15 20:30:44.925,,,...,19.221273,0.000892,19.22,19.224,0.004,,,,,
125,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,2816,2955,140,2025-08-15 20:30:13.909,2025-08-15 20:32:32.973,,,...,19.222336,0.000979,19.22,19.224,0.004,,,,,


## 7.1 Отбор плато: самое длинное в пределах ±DEG_TOL °C по эталону


In [90]:
# --- Guard: ensure stability functions exist ---
if 'detect_stability_improved' not in globals() or 'select_longest_per_degree' not in globals():
    import numpy as np
    import pandas as pd
    from typing import List, Tuple, Sequence, Dict, Optional

    def rolling_std_mask(series, window, threshold):
        rs = series.rolling(window=window, min_periods=window).std()
        return (rs <= threshold)

    def rolling_mean_abs_diff_mask(series, window, diff_threshold):
        d = series.diff().abs()
        m = d.rolling(window=window, min_periods=window).mean()
        return (m <= diff_threshold)

    def segments_from_mask(mask, window):
        import numpy as _np
        arr = mask.to_numpy() if hasattr(mask, 'to_numpy') else _np.asarray(mask)
        segs = []; cur=None
        for i, ok in enumerate(arr):
            if ok:
                s = max(0, i - window + 1); e = i
                if cur is None: cur=(s,e)
                else:
                    cs, ce = cur
                    if s <= ce + 1: cur=(cs, max(ce, e))
                    else: segs.append(cur); cur=(s,e)
            else:
                if cur is not None: segs.append(cur); cur=None
        if cur is not None: segs.append(cur)
        return segs

    def summarize_interval(df, cols, s, e):
        row={'start_idx':int(s),'end_idx':int(e),'length':int(e-s+1)}
        row['start_date']=pd.to_datetime(df.loc[s,'date']) if s < len(df) else pd.NaT
        row['end_date']=pd.to_datetime(df.loc[e,'date']) if e < len(df) else pd.NaT
        for c in cols:
            vals = df[c].to_numpy()[s:e+1]
            good = ~np.isnan(vals)
            row[f'mean_{c}'] = float(np.nanmean(vals)) if good.any() else np.nan
            row[f'std_{c}'] = float(np.nanstd(vals, ddof=1)) if good.sum()>1 else np.nan
            row[f'min_{c}'] = float(np.nanmin(vals)) if good.any() else np.nan
            row[f'max_{c}'] = float(np.nanmax(vals)) if good.any() else np.nan
            row[f'range_{c}'] = row[f'max_{c}'] - row[f'min_{c}'] if good.any() else np.nan
        return row

    def median_level(series, s, e):
        vals = series.to_numpy()[int(s):int(e)+1]
        return float(np.nanmedian(vals))

    def split_segment_by_ref_buckets(ref_series, s, e, deg_tol):
        vals = ref_series.to_numpy()[s:e+1]
        if len(vals)==0 or np.all(np.isnan(vals)): return [(s,e)]
        buckets = np.floor(vals/float(deg_tol)).astype('float64')
        mask_good = ~np.isnan(buckets)
        if mask_good.any():
            last=None
            for i in range(len(buckets)):
                if np.isnan(buckets[i]):
                    buckets[i] = last if last is not None else buckets[mask_good][0]
                last=buckets[i]
        segments=[]; start=s; base=buckets[0]
        for i in range(1,len(buckets)):
            if buckets[i]!=base:
                segments.append((start, s+i-1)); start=s+i; base=buckets[i]
        segments.append((start,e))
        return segments

    def detect_stability_improved(data, ref_idx, follow_idxs, window=20, std_thr=0.05, diff_thr=0.002, min_len=20, group_by_file=True, split_by_ref_buckets=True, deg_tol=1.0, max_ref_range=0.3):
        ref_col=f'T{ref_idx}'; follow_cols=[f'T{i}' for i in follow_idxs]
        for c in [ref_col]+follow_cols:
            if c not in data.columns: raise ValueError(f'Missing column: {c}')
        if group_by_file and 'source_file' in data.columns:
            first_idx_by_source = (data.index.to_series().groupby(data['source_file']).min().astype(int).to_dict())
            groups=list(data.groupby('source_file', sort=False))
        else:
            first_idx_by_source=None; groups=[('ALL', data)]
        joint_rows=[]; by_sensor_rows=[]
        for src, g0 in groups:
            g=g0.reset_index(drop=True)
            mask_ref_std = rolling_std_mask(g[ref_col], window, std_thr)
            mask_ref_slope = rolling_mean_abs_diff_mask(g[ref_col], window, diff_thr)
            base_ref_mask = (mask_ref_std & mask_ref_slope).to_numpy()
            masks_others = {c: rolling_std_mask(g[c], window, std_thr).to_numpy() for c in follow_cols}
            joint_mask = base_ref_mask.copy()
            for c in follow_cols: joint_mask &= masks_others[c]
            def _emit_segment(s,e,cols,extra,container):
                subs=[(s,e)]
                if split_by_ref_buckets: subs=split_segment_by_ref_buckets(g[ref_col], s, e, deg_tol)
                for ss,ee in subs:
                    if (ee-ss+1) < int(min_len): continue
                    row={'source_file':src}; row.update(extra); row.update(summarize_interval(g, cols, ss, ee))
                    if src!='ALL' and first_idx_by_source is not None:
                        first_idx=int(first_idx_by_source.get(src,0)); row['start_idx_abs']=row['start_idx']+first_idx; row['end_idx_abs']=row['end_idx']+first_idx
                    else:
                        row['start_idx_abs']=row['start_idx']; row['end_idx_abs']=row['end_idx']
                    ref_med=median_level(g[ref_col], ss, ee); row['ref_level']=ref_med; row['range_ref']=row.get(f'range_{ref_col}', np.nan)
                    if max_ref_range is not None and not np.isnan(row['range_ref']) and row['range_ref']>max_ref_range: continue
                    container.append(row)
            for (s,e) in [(s,e) for (s,e) in segments_from_mask(joint_mask, window) if (e-s+1)>=int(min_len)]:
                _emit_segment(s,e,[ref_col]+follow_cols, {'ref':ref_col, 'followers':','.join(follow_cols)}, joint_rows)
            for c in follow_cols:
                gated = masks_others[c] & base_ref_mask
                segs=[(s,e) for (s,e) in segments_from_mask(gated, window) if (e-s+1)>=int(min_len)]
                for (s,e) in segs:
                    _emit_segment(s,e,[c, ref_col], {'sensor':c, 'ref':ref_col}, by_sensor_rows)
        joint = pd.DataFrame(joint_rows).sort_values(['source_file','start_idx']) if joint_rows else pd.DataFrame()
        by_sensor = pd.DataFrame(by_sensor_rows).sort_values(['source_file','sensor','start_idx']) if by_sensor_rows else pd.DataFrame()
        return joint, by_sensor

    def select_longest_per_degree(table, data, ref_idx, mode, deg_tol=1.0, strategy='bucket_centered', centered=True):
        if table is None or table.empty: return table
        ref_col=f'T{ref_idx}'; tbl=table.copy()
        if 'ref_level' not in tbl.columns:
            ref_levels=[]
            for _,row in tbl.iterrows():
                src=row['source_file']; s,e=int(row['start_idx']),int(row['end_idx'])
                g = data[data['source_file']==src].reset_index(drop=True) if src!='ALL' else data
                ref_levels.append(median_level(g[ref_col], s, e))
            tbl['ref_level']=ref_levels
        group_keys=['source_file','sensor'] if (mode=='by_sensor' and 'sensor' in tbl.columns) else ['source_file']
        out=[]
        for _, grp in tbl.sort_values('ref_level').groupby(group_keys, as_index=False):
            g=grp.sort_values('ref_level').reset_index(drop=True)
            if strategy in ('bucket','bucket_centered') or centered:
                bins = np.floor((g['ref_level'] + 0.5*deg_tol)/deg_tol).astype(int) if (strategy=='bucket_centered' or centered) else np.floor(g['ref_level']/deg_tol).astype(int)
                g=g.assign(_bin=bins)
                keep = g.sort_values('length', ascending=False).groupby('_bin', as_index=False).head(1)
                out.append(keep.drop(columns=['_bin']))
            else:
                i,n=0,len(g)
                while i<n:
                    start_val=g.loc[i,'ref_level']; j=i
                    while j+1<n and (g.loc[j+1,'ref_level']-start_val) <= deg_tol: j+=1
                    cluster=g.loc[i:j].copy()
                    keep = cluster.sort_values(['length','end_idx'], ascending=[False,False]).iloc[0:1]
                    out.append(keep); i=j+1
        return pd.concat(out, ignore_index=True) if out else tbl

# === 8) Отбор по полочкам: один самый длинный интервал на корзину ===
assert DATA is not None, 'Нет DATA.'
if STABLE_BY_SENSOR is not None and not STABLE_BY_SENSOR.empty:
    STABLE_BY_SENSOR = select_longest_per_degree(STABLE_BY_SENSOR, DATA, REF_IDX, mode='by_sensor', deg_tol=DEG_TOL, strategy='bucket_centered', centered=True)
    print('STABLE_BY_SENSOR после отбора:', len(STABLE_BY_SENSOR))
    display(STABLE_BY_SENSOR.head(10))
if STABLE_JOINT is not None and not STABLE_JOINT.empty:
    STABLE_JOINT = select_longest_per_degree(STABLE_JOINT, DATA, REF_IDX, mode='joint', deg_tol=DEG_TOL, strategy='bucket_centered', centered=True)
    print('STABLE_JOINT после отбора:', len(STABLE_JOINT))
    display(STABLE_JOINT.head(10))


STABLE_BY_SENSOR после отбора: 15


Unnamed: 0,source_file,sensor,ref,start_idx,end_idx,length,start_date,end_date,mean_T9,std_T9,...,mean_T10,std_T10,min_T10,max_T10,range_T10,mean_T11,std_T11,min_T11,max_T11,range_T11
0,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,7595,10923,3329,2025-08-15 21:49:55.050,2025-08-15 22:45:24.584,,,...,23.214228,0.000714,23.211,23.217,0.006,,,,,
1,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,13498,15781,2284,2025-08-15 23:28:20.757,2025-08-16 00:06:24.796,,,...,27.203373,0.000901,27.2,27.207,0.007,,,,,
2,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,4526,5715,1190,2025-08-15 20:58:44.690,2025-08-15 21:18:34.236,,,...,19.220958,0.000872,19.219,19.224,0.005,,,,,
3,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,19902,20343,442,2025-08-16 01:15:07.667,2025-08-16 01:22:28.865,,,...,31.193084,0.001119,31.19,31.197,0.007,,,,,
4,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,21883,22026,144,2025-08-16 01:48:09.566,2025-08-16 01:50:32.632,,,...,35.169771,0.001002,35.166,35.172,0.006,,,,,
5,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,7595,10931,3337,2025-08-15 21:49:55.050,2025-08-15 22:45:32.588,,,...,,,,,,23.048051,0.00076,23.044,23.051,0.007
6,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,3313,6082,2770,2025-08-15 20:38:31.135,2025-08-15 21:24:41.403,,,...,,,,,,19.053984,0.000916,19.051,19.057,0.006
7,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,12577,14342,1766,2025-08-15 23:12:59.337,2025-08-15 23:42:25.143,,,...,,,,,,27.035089,0.001009,27.032,27.038,0.006
8,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,19902,20356,455,2025-08-16 01:15:07.667,2025-08-16 01:22:41.872,,,...,,,,,,31.026727,0.001127,31.023,31.031,0.008
9,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,22479,22640,162,2025-08-16 01:58:05.838,2025-08-16 02:00:46.913,,,...,,,,,,35.002704,0.000984,35.001,35.005,0.004


STABLE_JOINT после отбора: 5


Unnamed: 0,source_file,ref,followers,start_idx,end_idx,length,start_date,end_date,mean_T8,std_T8,...,range_T10,mean_T11,std_T11,min_T11,max_T11,range_T11,start_idx_abs,end_idx_abs,ref_level,range_ref
0,uploaded:Log_192.168.77.10_2025_08_15.txt,T8,"T9,T10,T11",7614,10923,3310,2025-08-15 21:50:14.058,2025-08-15 22:45:24.584,22.945693,0.000694,...,0.005,23.04806,0.000736,23.044,23.051,0.007,7614,10923,22.946,0.006
1,uploaded:Log_192.168.77.10_2025_08_15.txt,T8,"T9,T10,T11",4526,5715,1190,2025-08-15 20:58:44.690,2025-08-15 21:18:34.236,18.951103,0.000681,...,0.005,19.053768,0.000747,19.052,19.056,0.004,4526,5715,18.951,0.006
2,uploaded:Log_192.168.77.10_2025_08_15.txt,T8,"T9,T10,T11",13498,14342,845,2025-08-15 23:28:20.757,2025-08-15 23:42:25.143,26.930291,0.000846,...,0.005,27.035465,0.000908,27.032,27.038,0.006,13498,14342,26.93,0.005
3,uploaded:Log_192.168.77.10_2025_08_15.txt,T8,"T9,T10,T11",19986,20307,322,2025-08-16 01:16:31.704,2025-08-16 01:21:52.850,30.908925,0.001056,...,0.007,31.026863,0.000995,31.024,31.03,0.006,19986,20307,30.909,0.005
4,uploaded:Log_192.168.77.10_2025_08_15.txt,T8,"T9,T10,T11",22507,22634,128,2025-08-16 01:58:33.852,2025-08-16 02:00:40.911,34.874852,0.000653,...,0.003,35.002789,0.000911,35.001,35.004,0.003,22507,22634,34.875,0.003


In [91]:
# === 9) Калибровочная таблица (X–Y и ошибки) ===
assert DATA is not None, 'Нет DATA.'
ref_col = f'T{REF_IDX}'
rows=[]
if STABLE_BY_SENSOR is not None and not STABLE_BY_SENSOR.empty:
    for _, r in STABLE_BY_SENSOR.iterrows():
        sensor = r['sensor']
        x_mean = r.get(f'mean_{sensor}', None)
        x_std  = r.get(f'std_{sensor}', None)
        y_mean = r.get(f'mean_{ref_col}', None)
        y_std  = r.get(f'std_{ref_col}', None)
        ref_level = r.get('ref_level', None)
        length = int(r.get('length', 0))
        start_date = r.get('start_date', None)
        end_date   = r.get('end_date', None)
        src = r.get('source_file', None)
        if ref_level is not None and not pd.isna(ref_level):
            bin_center = float(np.round(ref_level / DEG_TOL) * DEG_TOL)
            bin_low = bin_center - 0.5*DEG_TOL
            bin_high = bin_center + 0.5*DEG_TOL
        else:
            bin_center = np.nan; bin_low=np.nan; bin_high=np.nan
        rows.append({
            'source_file': src, 'sensor': sensor, 'ref': ref_col,
            'bin_center': bin_center, 'bin_low': bin_low, 'bin_high': bin_high,
            'x_mean': x_mean, 'x_std': x_std, 'y_mean': y_mean, 'y_std': y_std,
            'n_samples': length, 'start_date': start_date, 'end_date': end_date
        })
calibration_points_by_sensor = pd.DataFrame(rows).dropna(subset=['x_mean','y_mean']).reset_index(drop=True)
print('Калибровочных точек:', len(calibration_points_by_sensor))
display(calibration_points_by_sensor.head(10))


Калибровочных точек: 15


Unnamed: 0,source_file,sensor,ref,bin_center,bin_low,bin_high,x_mean,x_std,y_mean,y_std,n_samples,start_date,end_date
0,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,23.0,22.5,23.5,23.214228,0.000714,22.945675,0.000733,3329,2025-08-15 21:49:55.050,2025-08-15 22:45:24.584
1,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,27.0,26.5,27.5,27.203373,0.000901,26.930758,0.000877,2284,2025-08-15 23:28:20.757,2025-08-16 00:06:24.796
2,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,19.0,18.5,19.5,19.220958,0.000872,18.951103,0.000681,1190,2025-08-15 20:58:44.690,2025-08-15 21:18:34.236
3,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,31.0,30.5,31.5,31.193084,0.001119,30.908769,0.001165,442,2025-08-16 01:15:07.667,2025-08-16 01:22:28.865
4,uploaded:Log_192.168.77.10_2025_08_15.txt,T10,T8,35.0,34.5,35.5,35.169771,0.001002,34.868563,0.000914,144,2025-08-16 01:48:09.566,2025-08-16 01:50:32.632
5,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,23.0,22.5,23.5,23.048051,0.00076,22.945678,0.000735,3337,2025-08-15 21:49:55.050,2025-08-15 22:45:32.588
6,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,19.0,18.5,19.5,19.053984,0.000916,18.951265,0.000772,2770,2025-08-15 20:38:31.135,2025-08-15 21:24:41.403
7,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,27.0,26.5,27.5,27.035089,0.001009,26.930103,0.000855,1766,2025-08-15 23:12:59.337,2025-08-15 23:42:25.143
8,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,31.0,30.5,31.5,31.026727,0.001127,30.908774,0.001151,455,2025-08-16 01:15:07.667,2025-08-16 01:22:41.872
9,uploaded:Log_192.168.77.10_2025_08_15.txt,T11,T8,35.0,34.5,35.5,35.002704,0.000984,34.874772,0.00069,162,2025-08-16 01:58:05.838,2025-08-16 02:00:46.913


In [92]:
# === 9.1) Сохранение калибровочной таблицы ===
out_dir = os.getcwd()
calibration_points_by_sensor.to_csv(os.path.join(out_dir, 'calibration_points_by_sensor.csv'), index=False)
print('Сохранено:', os.path.join(out_dir, 'calibration_points_by_sensor.csv'))


Сохранено: c:\Users\Alexander\Documents\GitHub\sensor_calibration\calibration_points_by_sensor.csv


In [93]:
# === 10) Быстрый график калибровочных точек с ошибками ===
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

assert 'calibration_points_by_sensor' in globals() and not calibration_points_by_sensor.empty, 'Нет калибровочной таблицы.'

for sensor, g in calibration_points_by_sensor.groupby('sensor'):
    g = g.sort_values('bin_center') if 'bin_center' in g.columns else g.sort_values('x_mean')
    x_vals = g['x_mean'].to_numpy(dtype=float)
    y_vals = g['y_mean'].to_numpy(dtype=float)
    x_err_raw = g['x_std'].to_numpy(dtype=float) if 'x_std' in g else np.full_like(x_vals, np.nan)
    y_err_raw = g['y_std'].to_numpy(dtype=float) if 'y_std' in g else np.full_like(y_vals, np.nan)

    def _err_dict(arr: np.ndarray):
        clean = np.nan_to_num(arr, nan=0.0)
        if not np.any(clean):
            return None
        return dict(type='data', array=clean, visible=True)

    error_x = _err_dict(x_err_raw)
    error_y = _err_dict(y_err_raw)

    labels = [f"{bc:.1f}" for bc in g['bin_center']] if 'bin_center' in g.columns else [str(i) for i in range(len(g))]
    fig = make_subplots(rows=2, cols=1, row_heights=[0.65, 0.35], vertical_spacing=0.12)

    fig.add_trace(
        go.Scatter(
            x=x_vals,
            y=y_vals,
            mode='markers',
            name='Точки',
            error_x=error_x,
            error_y=error_y,
            marker=dict(size=9)
        ),
        row=1,
        col=1
    )

    fig.add_trace(
        go.Bar(
            x=labels,
            y=np.nan_to_num(x_err_raw, nan=0.0),
            name='std X'
        ),
        row=2,
        col=1
    )
    fig.add_trace(
        go.Bar(
            x=labels,
            y=np.nan_to_num(y_err_raw, nan=0.0),
            name='std Y'
        ),
        row=2,
        col=1
    )

    fig.update_xaxes(title_text=f'{sensor} (X)', row=1, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_yaxes(title_text=f'T{REF_IDX} (Y)', row=1, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_xaxes(title_text='Центр полочки (°C)', row=2, col=1, type='category', showgrid=True, gridcolor='rgba(0,0,0,0.12)', categoryorder='array', categoryarray=labels)
    fig.update_yaxes(title_text='Std (°C)', row=2, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.12)')

    fig.update_layout(
        height=620,
        width=760,
        title_text=f'Калибровочные точки: {sensor}',
        barmode='group',
        legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5),
        margin=dict(t=80, l=60, r=40, b=70)
    )

    fig.show()

In [94]:
# === 11) Калибровочные кривые (полиномы) ===
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

assert 'calibration_points_by_sensor' in globals() and not calibration_points_by_sensor.empty, 'Нет калибровочной таблицы.'

def _poly_formula(coeffs):
    # coeffs in descending order: c0*x^n + c1*x^(n-1) + ... + cN
    deg = len(coeffs) - 1
    terms = []
    for i, c in enumerate(coeffs):
        p = deg - i
        if abs(c) < 1e-15:
            continue
        coef = f"{c:.8g}"
        if p == 0:
            term = f"{coef}"
        elif p == 1:
            term = f"{coef}·X"
        else:
            term = f"{coef}·X^{p}"
        terms.append(term)
    if not terms:
        return 'Y = 0'
    s = ' + '.join(terms).replace('+ -', '- ')
    return 'Y = ' + s

for sensor, g in calibration_points_by_sensor.groupby('sensor'):
    g = g.sort_values('x_mean').reset_index(drop=True)
    x = g['x_mean'].to_numpy(dtype=float)
    y = g['y_mean'].to_numpy(dtype=float)
    y_std = g['y_std'].to_numpy(dtype=float) if 'y_std' in g else np.full_like(y, np.nan)
    w = 1.0 / np.clip(y_std, 1e-9, np.inf)
    n = len(x)
    deg_max = max(1, n - 2)

    fits = []
    for d in range(1, deg_max + 1):
        try:
            coefs = np.polyfit(x, y, deg=d, w=w)
            p = np.poly1d(coefs)
            yhat = p(x)
            resid = y - yhat
            rmse = float(np.sqrt(np.mean(resid**2)))
            mae = float(np.mean(np.abs(resid)))
            mx = float(np.max(np.abs(resid)))
            fits.append({'deg': d, 'coefs': coefs, 'poly': p, 'rmse': rmse, 'mae': mae, 'maxerr': mx, 'resid': resid})
        except Exception as e:
            print(f"[warn] {sensor}: не удалось аппроксимировать степенью {d}: {e}")

    print(f"Сенсор {sensor}: {n} точек; степени 1..{deg_max}")
    for f in fits:
        cdesc = ', '.join([f"{c:.8g}" for c in f['coefs']])
        print(f"deg={f['deg']}: {_poly_formula(f['coefs'])}")
        print(f"    coeffs (старшая→младшая): [{cdesc}] | RMSE={f['rmse']:.6g}, MAE={f['mae']:.6g}, Max|err|={f['maxerr']:.6g}")

    xs = np.linspace(float(x.min()), float(x.max()), 200) if n > 1 else x
    x_err = g['x_std'].to_numpy(dtype=float) if 'x_std' in g else np.full_like(x, np.nan)
    y_err = g['y_std'].to_numpy(dtype=float) if 'y_std' in g else np.full_like(y, np.nan)

    def _err_dict(arr: np.ndarray):
        clean = np.nan_to_num(arr, nan=0.0)
        if not np.any(clean):
            return None
        return dict(type='data', array=clean, visible=True)

    error_x = _err_dict(x_err)
    error_y = _err_dict(y_err)

    fig = make_subplots(rows=2, cols=1, row_heights=[0.65, 0.35], vertical_spacing=0.12)
    fig.add_trace(
        go.Scatter(
            x=x,
            y=y,
            mode='markers',
            name='Точки',
            marker=dict(size=9),
            error_x=error_x,
            error_y=error_y
        ),
        row=1,
        col=1
    )

    p1 = next((f['poly'] for f in fits if f['deg'] == 1), None)
    pN = max(fits, key=lambda f: f['deg'])['poly'] if fits else None
    if p1 is not None:
        fig.add_trace(
            go.Scatter(x=xs, y=p1(xs), mode='lines', name='deg=1'),
            row=1, col=1
        )
    if pN is not None and (p1 is None or pN.order != 1):
        fig.add_trace(
            go.Scatter(x=xs, y=pN(xs), mode='lines', name=f"deg={pN.order}"),
            row=1, col=1
        )

    labels = [f"{bc:.1f}" for bc in g['bin_center']] if 'bin_center' in g.columns else [f"{xi:.2f}" for xi in x]
    for f in fits:
        fig.add_trace(
            go.Bar(
                x=labels,
                y=f['resid'],
                name=f"deg={f['deg']} (остатки)"
            ),
            row=2,
            col=1
        )

    fig.update_xaxes(title_text=f'{sensor} (X)', row=1, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_yaxes(title_text=f'T{REF_IDX} (Y)', row=1, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_xaxes(title_text='Полочка (центр)', row=2, col=1, type='category', showgrid=True, gridcolor='rgba(0,0,0,0.12)', categoryorder='array', categoryarray=labels)
    fig.update_yaxes(title_text='Y - Y_hat (°C)', row=2, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.12)')

    fig.update_layout(
        height=640,
        width=860,
        title_text=f'Калибровка: {sensor}',
        barmode='group',
        legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5),
        margin=dict(t=80, l=60, r=40, b=70)
    )

    fig.show()

Сенсор T10: 5 точек; степени 1..3
deg=1: Y = 0.9981481·X - 0.22939048
    coeffs (старшая→младшая): [0.9981481, -0.22939048] | RMSE=0.00533545, MAE=0.0050761, Max|err|=0.00715379
deg=2: Y = -0.00019791819·X^2 + 1.0088016·X - 0.36597493
    coeffs (старшая→младшая): [-0.00019791819, 1.0088016, -0.36597493] | RMSE=0.0002506, MAE=0.000207032, Max|err|=0.000391996
deg=3: Y = -5.8574927e-07·X^3 - 0.00015069247·X^2 + 1.0075655·X - 0.35547515
    coeffs (старшая→младшая): [-5.8574927e-07, -0.00015069247, 1.0075655, -0.35547515] | RMSE=0.000263388, MAE=0.000211801, Max|err|=0.000426298


Сенсор T11: 5 точек; степени 1..3
deg=1: Y = 0.9982978·X - 0.065558619
    coeffs (старшая→младшая): [0.9982978, -0.065558619] | RMSE=0.00399021, MAE=0.0033894, Max|err|=0.00659124
deg=2: Y = -0.00014354106·X^2 + 1.0061495·X - 0.16768682
    coeffs (старшая→младшая): [-0.00014354106, 1.0061495, -0.16768682] | RMSE=0.00143886, MAE=0.000975662, Max|err|=0.00288491
deg=3: Y = 4.8176093e-06·X^3 - 0.00053158534·X^2 + 1.0162929·X - 0.25371114
    coeffs (старшая→младшая): [4.8176093e-06, -0.00053158534, 1.0162929, -0.25371114] | RMSE=0.0011717, MAE=0.000933485, Max|err|=0.00191682


Сенсор T9: 5 точек; степени 1..3
deg=1: Y = 0.99909934·X - 0.02008969
    coeffs (старшая→младшая): [0.99909934, -0.02008969] | RMSE=0.00579573, MAE=0.00482717, Max|err|=0.0106218
deg=2: Y = -0.0001948214·X^2 + 1.0096536·X - 0.15592737
    coeffs (старшая→младшая): [-0.0001948214, 1.0096536, -0.15592737] | RMSE=0.00211251, MAE=0.00171214, Max|err|=0.00352041
deg=3: Y = -2.751088e-06·X^3 + 2.5790822e-05·X^2 + 1.0039124·X - 0.10745339
    coeffs (старшая→младшая): [-2.751088e-06, 2.5790822e-05, 1.0039124, -0.10745339] | RMSE=0.00216371, MAE=0.00172238, Max|err|=0.00338897


In [95]:
# === 12) Калибровочные модели (L2 и L_inf) ===
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

assert 'calibration_points_by_sensor' in globals() and not calibration_points_by_sensor.empty, 'Нет калибровочной таблицы.'

try:
    from scipy.optimize import linprog  # optional, для точной L_inf
    _HAS_SCIPY = True
except Exception:
    _HAS_SCIPY = False

def _build_vandermonde(x, deg):
    # Descending powers
    return np.vander(x, N=deg+1, increasing=False)

def _poly_formula(coeffs):
    deg = len(coeffs) - 1
    parts = []
    for i, c in enumerate(coeffs):
        p = deg - i
        if abs(c) < 1e-15:
            continue
        coef = f"{c:.8g}"
        if p == 0:
            parts.append(f"{coef}")
        elif p == 1:
            parts.append(f"{coef}·X")
        else:
            parts.append(f"{coef}·X^{p}")
    if not parts:
        return 'Y = 0'
    expr = ' + '.join(parts).replace('+ -', '- ')
    return 'Y = ' + expr

def fit_L2(x, y, deg, y_std=None):
    if y_std is not None:
        w = 1.0 / np.clip(y_std, 1e-12, np.inf)
        coefs = np.polyfit(x, y, deg=deg, w=w)
    else:
        coefs = np.polyfit(x, y, deg=deg)
    p = np.poly1d(coefs)
    yhat = p(x)
    resid = y - yhat
    return coefs, p, resid

def fit_Linf(x, y, deg, y_std=None, max_iter_irls=25):
    # Минимакс через LP, если доступен SciPy. Иначе — IRLS-приближение к L_inf.
    if _HAS_SCIPY:
        X = _build_vandermonde(x, deg)
        if y_std is not None:
            w = 1.0 / np.clip(y_std, 1e-12, np.inf)
            Xw = X * w[:, None]
            yw = y * w
        else:
            Xw = X
            yw = y
        n, m = Xw.shape
        A1 = np.hstack([Xw, -np.ones((n,1))])
        b1 = yw
        A2 = np.hstack([-Xw, -np.ones((n,1))])
        b2 = -yw
        A_ub = np.vstack([A1, A2])
        b_ub = np.concatenate([b1, b2])
        c_vec = np.zeros(m+1); c_vec[-1] = 1.0
        bounds = [(None, None)]*m + [(0, None)]
        res = linprog(c_vec, A_ub=A_ub, b_ub=b_ub, bounds=bounds, method='highs')
        if not res.success:
            raise RuntimeError('linprog failed: ' + str(res.message))
        coefs = res.x[:-1]
        p = np.poly1d(coefs)
        yhat = p(x)
        resid = y - yhat
        return coefs, p, resid
    coefs = np.polyfit(x, y, deg=deg)
    for _ in range(max_iter_irls):
        p = np.poly1d(coefs)
        resid = y - p(x)
        s = np.clip(np.abs(resid), 1e-9, None)
        if y_std is not None:
            s = s / np.clip(y_std, 1e-12, np.inf)
        w = 1.0 / s
        coefs = np.polyfit(x, y, deg=deg, w=w)
    p = np.poly1d(coefs)
    resid = y - p(x)
    return coefs, p, resid

def _err_dict(arr: np.ndarray):
    clean = np.nan_to_num(arr, nan=0.0)
    if not np.any(clean):
        return None
    return dict(type='data', array=clean, visible=True)

rows = []

for sensor, g in calibration_points_by_sensor.groupby('sensor'):
    g = g.sort_values('x_mean').reset_index(drop=True)
    x = g['x_mean'].to_numpy(float)
    y = g['y_mean'].to_numpy(float)
    y_std = g['y_std'].to_numpy(float) if 'y_std' in g else np.full_like(y, np.nan)
    n = len(x)
    deg_max = max(1, n - 2)

    model_bank = []
    for d in range(1, deg_max+1):
        c2, p2, r2 = fit_L2(x, y, d, y_std)
        rmse2 = float(np.sqrt(np.mean(r2**2)))
        mae2 = float(np.mean(np.abs(r2)))
        mx2 = float(np.max(np.abs(r2)))
        model_bank.append({'sensor':sensor,'method':'L2','deg':d,'coefs':c2,'poly':p2,'resid':r2,'rmse':rmse2,'mae':mae2,'maxerr':mx2})
        cI, pI, rI = fit_Linf(x, y, d, y_std)
        rmseI = float(np.sqrt(np.mean(rI**2)))
        maeI = float(np.mean(np.abs(rI)))
        mxI = float(np.max(np.abs(rI)))
        model_bank.append({'sensor':sensor,'method':'L_inf','deg':d,'coefs':cI,'poly':pI,'resid':rI,'rmse':rmseI,'mae':maeI,'maxerr':mxI})

    print(f"Сенсор {sensor}: {n} точек; степени 1..{deg_max}")
    for f in model_bank:
        cdesc = ', '.join([f"{c:.8g}" for c in f['coefs']])
        print(f"{f['method']} deg={f['deg']}: {_poly_formula(f['coefs'])}")
        print(f"    coeffs (старшая→младшая): [{cdesc}] | RMSE={f['rmse']:.6g}, MAE={f['mae']:.6g}, Max|err|={f['maxerr']:.6g}")

    for f in model_bank:
        row = {
            'sensor': f['sensor'],
            'method': f['method'],
            'degree': f['deg'],
            'n_points': n,
            'rmse': f['rmse'],
            'mae': f['mae'],
            'maxerr': f['maxerr'],
            'formula': _poly_formula(f['coefs'])
        }
        d = f['deg']
        for i, c in enumerate(f['coefs']):
            pwr = d - i
            row[f'c_p{pwr}'] = float(c)
        rows.append(row)

    xs = np.linspace(float(x.min()), float(x.max()), 200) if n > 1 else x
    x_err = g['x_std'].to_numpy(float) if 'x_std' in g else np.full_like(x, np.nan)
    y_err = g['y_std'].to_numpy(float) if 'y_std' in g else np.full_like(y, np.nan)
    error_x = _err_dict(x_err)
    error_y = _err_dict(y_err)
    labels = [f"{bc:.1f}" for bc in g['bin_center']] if 'bin_center' in g else [f"{xi:.2f}" for xi in x]

    fig = make_subplots(
        rows=2, cols=2,
        row_heights=[0.6, 0.4],
        vertical_spacing=0.12,
        horizontal_spacing=0.12,
        subplot_titles=(
            f'{sensor}: аппроксимация L2',
            f'{sensor}: аппроксимация L_inf' + ('' if _HAS_SCIPY else ' (IRLS-приближение)'),
            'Остатки L2 (Y - Y_hat)',
            'Остатки L_inf (Y - Y_hat)'
        )
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=y, mode='markers', name='Точки',
            legendgroup='L2', marker=dict(size=9),
            error_x=error_x, error_y=error_y
        ),
        row=1, col=1
    )
    p2_1 = next((f['poly'] for f in model_bank if f['method']=='L2' and f['deg']==1), None)
    p2_N = next((f['poly'] for f in model_bank if f['method']=='L2' and f['deg']==deg_max), None)
    if p2_1 is not None:
        fig.add_trace(
            go.Scatter(x=xs, y=p2_1(xs), mode='lines', name='L2 deg=1', legendgroup='L2'),
            row=1, col=1
        )
    if p2_N is not None and (p2_1 is None or p2_N.order != 1):
        fig.add_trace(
            go.Scatter(x=xs, y=p2_N(xs), mode='lines', name=f'L2 deg={p2_N.order}', legendgroup='L2'),
            row=1, col=1
        )

    fig.add_trace(
        go.Scatter(
            x=x, y=y, mode='markers', name='Точки (L_inf)',
            legendgroup='L_inf', marker=dict(size=9, symbol='circle-open'),
            opacity=0.95, error_x=error_x, error_y=error_y, showlegend=False
        ),
        row=1, col=2
    )
    pI_1 = next((f['poly'] for f in model_bank if f['method']=='L_inf' and f['deg']==1), None)
    pI_N = next((f['poly'] for f in model_bank if f['method']=='L_inf' and f['deg']==deg_max), None)
    if pI_1 is not None:
        fig.add_trace(
            go.Scatter(x=xs, y=pI_1(xs), mode='lines', name='L_inf deg=1', legendgroup='L_inf'),
            row=1, col=2
        )
    if pI_N is not None and (pI_1 is None or pI_N.order != 1):
        fig.add_trace(
            go.Scatter(x=xs, y=pI_N(xs), mode='lines', name=f'L_inf deg={pI_N.order}', legendgroup='L_inf'),
            row=1, col=2
        )

    for d in range(1, deg_max+1):
        r_l2 = next(f['resid'] for f in model_bank if f['method']=='L2' and f['deg']==d)
        fig.add_trace(
            go.Bar(
                x=labels, y=r_l2, name=f'L2 deg={d} (остатки)',
                legendgroup=f'L2_res_{d}', offsetgroup=f'L2_{d}'
            ),
            row=2, col=1
        )
        r_li = next(f['resid'] for f in model_bank if f['method']=='L_inf' and f['deg']==d)
        fig.add_trace(
            go.Bar(
                x=labels, y=r_li, name=f'L_inf deg={d} (остатки)',
                legendgroup=f'Linf_res_{d}', offsetgroup=f'Linf_{d}'
            ),
            row=2, col=2
        )

    fig.update_xaxes(title_text=f'{sensor} (X)', row=1, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_yaxes(title_text=f'T{REF_IDX} (Y)', row=1, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_xaxes(title_text=f'{sensor} (X)', row=1, col=2, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_yaxes(title_text=f'T{REF_IDX} (Y)', row=1, col=2, showgrid=True, gridcolor='rgba(0,0,0,0.15)')
    fig.update_xaxes(title_text='Полочка', row=2, col=1, type='category', showgrid=True, gridcolor='rgba(0,0,0,0.12)', categoryorder='array', categoryarray=labels)
    fig.update_xaxes(title_text='Полочка', row=2, col=2, type='category', showgrid=True, gridcolor='rgba(0,0,0,0.12)', categoryorder='array', categoryarray=labels)
    fig.update_yaxes(title_text='°C', row=2, col=1, showgrid=True, gridcolor='rgba(0,0,0,0.12)')
    fig.update_yaxes(title_text='°C', row=2, col=2, showgrid=True, gridcolor='rgba(0,0,0,0.12)')

    fig.update_layout(
        height=740,
        width=1120,
        barmode='group',
        legend=dict(orientation='h', yanchor='bottom', y=1.05, xanchor='center', x=0.5),
        margin=dict(t=100, l=60, r=40, b=90)
    )

    fig.show()

calibration_models = pd.DataFrame(rows)
out_csv = 'calibration_models.csv'
calibration_models.to_csv(out_csv, index=False)
print(f"Сохранено: {out_csv} ({len(calibration_models)} строк)")

Сенсор T10: 5 точек; степени 1..3
L2 deg=1: Y = 0.9981481·X - 0.22939048
    coeffs (старшая→младшая): [0.9981481, -0.22939048] | RMSE=0.00533545, MAE=0.0050761, Max|err|=0.00715379
L_inf deg=1: Y = 0.9981469·X - 0.22897818
    coeffs (старшая→младшая): [0.9981469, -0.22897818] | RMSE=0.00531583, MAE=0.00500018, Max|err|=0.00705688
L2 deg=2: Y = -0.00019791819·X^2 + 1.0088016·X - 0.36597493
    coeffs (старшая→младшая): [-0.00019791819, 1.0088016, -0.36597493] | RMSE=0.0002506, MAE=0.000207032, Max|err|=0.000391996
L_inf deg=2: Y = -0.00020078568·X^2 + 1.0089673·X - 0.36826834
    coeffs (старшая→младшая): [-0.00020078568, 1.0089673, -0.36826834] | RMSE=0.000270321, MAE=0.000243199, Max|err|=0.000397362
L2 deg=3: Y = -5.8574927e-07·X^3 - 0.00015069247·X^2 + 1.0075655·X - 0.35547515
    coeffs (старшая→младшая): [-5.8574927e-07, -0.00015069247, 1.0075655, -0.35547515] | RMSE=0.000263388, MAE=0.000211801, Max|err|=0.000426298
L_inf deg=3: Y = -3.3470524e-07·X^3 - 0.00017566289·X^2 + 1.00

Сенсор T11: 5 точек; степени 1..3
L2 deg=1: Y = 0.9982978·X - 0.065558619
    coeffs (старшая→младшая): [0.9982978, -0.065558619] | RMSE=0.00399021, MAE=0.0033894, Max|err|=0.00659124
L_inf deg=1: Y = 0.99838551·X - 0.066918517
    coeffs (старшая→младшая): [0.99838551, -0.066918517] | RMSE=0.0040191, MAE=0.00356401, Max|err|=0.00558007
L2 deg=2: Y = -0.00014354106·X^2 + 1.0061495·X - 0.16768682
    coeffs (старшая→младшая): [-0.00014354106, 1.0061495, -0.16768682] | RMSE=0.00143886, MAE=0.000975662, Max|err|=0.00288491
L_inf deg=2: Y = -0.00013616863·X^2 + 1.0056069·X - 0.15865848
    coeffs (старшая→младшая): [-0.00013616863, 1.0056069, -0.15865848] | RMSE=0.00151843, MAE=0.00143149, Max|err|=0.00217482
L2 deg=3: Y = 4.8176093e-06·X^3 - 0.00053158534·X^2 + 1.0162929·X - 0.25371114
    coeffs (старшая→младшая): [4.8176093e-06, -0.00053158534, 1.0162929, -0.25371114] | RMSE=0.0011717, MAE=0.000933485, Max|err|=0.00191682
L_inf deg=3: Y = 6.301231e-06·X^3 - 0.00067088579·X^2 + 1.0204821

Сенсор T9: 5 точек; степени 1..3
L2 deg=1: Y = 0.99909934·X - 0.02008969
    coeffs (старшая→младшая): [0.99909934, -0.02008969] | RMSE=0.00579573, MAE=0.00482717, Max|err|=0.0106218
L_inf deg=1: Y = 0.99914131·X - 0.019213779
    coeffs (старшая→младшая): [0.99914131, -0.019213779] | RMSE=0.00583037, MAE=0.00465296, Max|err|=0.00861436
L2 deg=2: Y = -0.0001948214·X^2 + 1.0096536·X - 0.15592737
    coeffs (старшая→младшая): [-0.0001948214, 1.0096536, -0.15592737] | RMSE=0.00211251, MAE=0.00171214, Max|err|=0.00352041
L_inf deg=2: Y = -0.00023112839·X^2 + 1.0116316·X - 0.18191753
    coeffs (старшая→младшая): [-0.00023112839, 1.0116316, -0.18191753] | RMSE=0.0022808, MAE=0.00221459, Max|err|=0.0030298
L2 deg=3: Y = -2.751088e-06·X^3 + 2.5790822e-05·X^2 + 1.0039124·X - 0.10745339
    coeffs (старшая→младшая): [-2.751088e-06, 2.5790822e-05, 1.0039124, -0.10745339] | RMSE=0.00216371, MAE=0.00172238, Max|err|=0.00338897
L_inf deg=3: Y = -8.2747493e-07·X^3 - 0.00016954287·X^2 + 1.0101263·X -

Сохранено: calibration_models.csv (18 строк)


In [99]:
# === 10) HTML-отчёт ===
import os
from pathlib import Path
from datetime import datetime
import html
import numbers

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

try:
    from scipy.optimize import linprog  # type: ignore
    _HAS_SCIPY = True
except Exception:
    _HAS_SCIPY = False

assert "calibration_points_by_sensor" in globals() and not calibration_points_by_sensor.empty, "Нет калибровочных точек."

REF_COL = f"T{REF_IDX}"
raw_ref_name = globals().get("REF_NAME", REF_COL)
ref_display = str(raw_ref_name) if raw_ref_name else REF_COL

ordered_cols = [f"T{REF_IDX + i}" for i in range(1, N_FOLLOW + 1)]
present_sensors = list(dict.fromkeys(calibration_points_by_sensor["sensor"]))

raw_names = globals().get("SENSOR_NAMES", {}) or {}
name_map: dict[str, str] = {}
if isinstance(raw_names, (list, tuple)):
    for idx, nm in enumerate(raw_names, start=1):
        if idx - 1 < len(ordered_cols):
            name_map[ordered_cols[idx - 1]] = str(nm)
elif isinstance(raw_names, dict):
    for key, value in raw_names.items():
        sensor_key: str | None = None
        if isinstance(key, str) and key.startswith("T"):
            sensor_key = key
        else:
            try:
                numeric = int(key)
                if 1 <= numeric <= len(ordered_cols):
                    sensor_key = ordered_cols[numeric - 1]
            except Exception:
                sensor_key = None
        if sensor_key:
            name_map[sensor_key] = str(value)
        elif isinstance(key, str):
            name_map[key] = str(value)

def sensor_sort_key(sensor: str):
    if isinstance(sensor, str) and sensor.startswith("T"):
        try:
            return (0, int(sensor[1:]))
        except Exception:
            pass
    return (1, sensor)

follow_order = [col for col in ordered_cols if col in present_sensors]
follow_order += sorted([s for s in present_sensors if s not in follow_order], key=sensor_sort_key)

has_data = "DATA" in globals() and isinstance(DATA, pd.DataFrame)
try:
    bin_width = float(globals().get("DEG_TOL", 1.0))
    if not np.isfinite(bin_width) or bin_width <= 0:
        bin_width = 1.0
except Exception:
    bin_width = 1.0

figure_counter = {"value": 1}

def add_figure(fig_obj: go.Figure | None, caption: str, parts: list[str]):
    if fig_obj is None:
        return
    html_fragment = pio.to_html(fig_obj, include_plotlyjs="inline" if figure_counter["value"] == 1 else False, full_html=False)
    idx = figure_counter["value"]
    figure_counter["value"] += 1
    parts.append("<div class='figure'>" + html_fragment + f"<p class='figure-caption'>Fig. {idx}. " + html.escape(caption) + "</p></div>")

def friendly_sensor(sensor: str) -> str:
    return str(name_map.get(sensor, sensor))

def ensure_suffix(label: str, sensor: str) -> str:
    label = str(label)
    return label if sensor in label else f"{label} ({sensor})"

def fmt_float(value):
    if pd.isna(value):
        return ""
    if isinstance(value, numbers.Real):
        return f"{value:.6g}"
    return str(value)

parts: list[str] = []

def open_section(title: str, intro: str | None = None, opened: bool = True):
    flag = "open" if opened else ""
    parts.append(f"<details {flag} class='section'>")
    parts.append(f"<summary><span>{html.escape(title)}</span></summary>")
    if intro:
        parts.append(f"<p>{html.escape(intro)}</p>")

def close_section():
    parts.append("</details>")

def build_coverage_figure(sensor: str, subset: pd.DataFrame) -> go.Figure:
    fig = go.Figure()
    if has_data and sensor in DATA.columns:
        series = DATA[sensor].dropna().astype(float)
    else:
        series = pd.Series(dtype=float)
    if series.empty:
        if {"bin_center", "n_samples"} <= set(subset.columns):
            fig.add_bar(x=subset["bin_center"], y=subset["n_samples"], name="Использовано", marker=dict(color="#1f77b4"))
        fig.update_layout(title=f"{friendly_sensor(sensor)} — распределение полочек", xaxis_title="Температура (°C)", yaxis_title="Количество точек", height=340, margin=dict(t=60, l=60, r=40, b=60))
        return fig
    minimum = float(np.floor(series.min() / bin_width) * bin_width)
    maximum = float(np.ceil(series.max() / bin_width) * bin_width + bin_width)
    bins = np.arange(minimum, maximum + bin_width * 0.5, bin_width)
    counts, edges = np.histogram(series.to_numpy(), bins=bins)
    centers = (edges[:-1] + edges[1:]) / 2
    used_counts = np.zeros_like(centers)
    if {"bin_center", "n_samples"} <= set(subset.columns):
        mapping = {round(float(c) / bin_width, 6): float(n) for c, n in zip(subset["bin_center"], subset["n_samples"])}
        used_counts = np.array([mapping.get(round(float(c) / bin_width, 6), 0.0) for c in centers])
    unused_counts = np.maximum(counts - used_counts, 0)
    fig.add_bar(x=centers, y=counts, name="Все точки", marker=dict(color="#b0bec5"))
    fig.add_bar(x=centers, y=used_counts, name="Использовано", marker=dict(color="#1f77b4"))
    if unused_counts.any():
        fig.add_bar(x=centers, y=unused_counts, name="Пропущено", marker=dict(color="#ff7f0e"))
    fig.update_layout(title=f"{friendly_sensor(sensor)} — покрытие температурных корзин", xaxis_title="Температура (°C)", yaxis_title="Количество точек", barmode="overlay", height=360, margin=dict(t=70, l=70, r=40, b=70), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5))
    return fig

def build_stability_figure(sensor: str, segments: pd.DataFrame) -> go.Figure | None:
    if segments is None or segments.empty or {"start_date", "end_date"} - set(segments.columns):
        return None
    seg = segments.copy()
    seg["start_date"] = pd.to_datetime(seg["start_date"], errors="coerce")
    seg["end_date"] = pd.to_datetime(seg["end_date"], errors="coerce")
    seg = seg.dropna(subset=["start_date", "end_date"]).sort_values("start_date").reset_index(drop=True)
    if seg.empty:
        return None
    seg["segment"] = [f"seg {i+1}" for i in range(len(seg))]
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, row_heights=[0.7, 0.3], vertical_spacing=0.08)
    if has_data and sensor in DATA.columns and "date" in DATA.columns:
        ts = DATA[["date", sensor]].copy()
        ts["date"] = pd.to_datetime(ts["date"], errors="coerce")
        ts = ts.dropna(subset=["date"]).sort_values("date")
        fig.add_trace(go.Scatter(x=ts["date"], y=ts[sensor], mode="lines", name=friendly_sensor(sensor)), row=1, col=1)
        if REF_COL in DATA.columns:
            ref_ts = DATA[["date", REF_COL]].copy()
            ref_ts["date"] = pd.to_datetime(ref_ts["date"], errors="coerce")
            ref_ts = ref_ts.dropna(subset=["date"]).sort_values("date")
            fig.add_trace(go.Scatter(x=ref_ts["date"], y=ref_ts[REF_COL], mode="lines", name=friendly_sensor(REF_COL), line=dict(dash="dash")), row=1, col=1)
    for _, row in seg.iterrows():
        start, end, label = row["start_date"], row["end_date"], row["segment"]
        fig.add_vrect(x0=start, x1=end, fillcolor="rgba(31,119,180,0.08)", line_width=0, row=1, col=1)
        fig.add_trace(go.Scatter(x=[start, end], y=[label, label], mode="lines", line=dict(width=10, color="#1f77b4"), hoverinfo="text", hovertext=f"{label}: {start:%Y-%m-%d %H:%M:%S} → {end:%Y-%m-%d %H:%M:%S}", showlegend=False), row=2, col=1)
        if "length" in seg.columns:
            fig.add_trace(go.Scatter(x=[end], y=[label], mode="text", text=[f"n={int(row['length'])}"], textposition="middle right", showlegend=False), row=2, col=1)
    fig.update_yaxes(title_text="Температура (°C)", row=1, col=1)
    fig.update_yaxes(type="category", title_text="Сегменты", row=2, col=1)
    fig.update_xaxes(title_text="Время", row=2, col=1)
    fig.update_layout(title=f"{friendly_sensor(sensor)} — температура и устойчивые интервалы", height=max(380, 140 + 40 * len(seg)), margin=dict(t=70, l=70, r=50, b=60), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5))
    return fig

def build_polyfit_figure(sensor: str, subset: pd.DataFrame) -> go.Figure | None:
    if subset.empty:
        return None
    g = subset.sort_values("x_mean").reset_index(drop=True)
    x = g["x_mean"].astype(float).to_numpy()
    y = g["y_mean"].astype(float).to_numpy()
    if len(x) < 2:
        return None
    y_std = g["y_std"].astype(float).to_numpy() if "y_std" in g.columns else np.full_like(y, np.nan)
    weights = 1.0 / np.clip(y_std, 1e-9, np.inf) if np.isfinite(y_std).any() else None
    n = len(x)
    deg_max = max(1, n - 2)
    fits = []
    for deg in range(1, deg_max + 1):
        try:
            coeffs = np.polyfit(x, y, deg=deg, w=weights) if weights is not None else np.polyfit(x, y, deg=deg)
            poly = np.poly1d(coeffs)
            resid = y - poly(x)
            fits.append({"deg": deg, "poly": poly, "resid": resid})
        except Exception:
            continue
    if not fits:
        return None
    xs = np.linspace(float(x.min()), float(x.max()), 200)
    fig = make_subplots(rows=2, cols=1, row_heights=[0.65, 0.35], vertical_spacing=0.12)
    fig.add_trace(go.Scatter(x=x, y=y, mode="markers", name="Точки", marker=dict(size=9), error_y=dict(type="data", array=np.nan_to_num(y_std, nan=0.0))), row=1, col=1)
    labels = [f"{bc:.1f}" for bc in g["bin_center"]] if "bin_center" in g.columns else [f"{val:.2f}" for val in x]
    for item in fits:
        fig.add_trace(go.Scatter(x=xs, y=item["poly"](xs), mode="lines", name=f"deg={item['deg']}", showlegend=True), row=1, col=1)
        fig.add_trace(go.Bar(x=list(range(len(labels))), y=item["resid"], name=f"deg={item['deg']}", opacity=0.6), row=2, col=1)
    fig.update_xaxes(title_text=f"{sensor} (X)", row=1, col=1)
    fig.update_yaxes(title_text=f"{REF_COL} (Y)", row=1, col=1)
    fig.update_xaxes(title_text="Полочка", tickmode="array", tickvals=list(range(len(labels))), ticktext=labels, row=2, col=1)
    fig.update_yaxes(title_text="Y - Ŷ (°C)", row=2, col=1)
    fig.update_layout(title=f"{friendly_sensor(sensor)} — полиномиальные аппроксимации", height=660, legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), margin=dict(t=80, l=60, r=40, b=80))
    return fig

def build_poly_from_row(row: pd.Series) -> np.poly1d | None:
    coeff_cols = [c for c in row.index if c.startswith("c_p")]
    if not coeff_cols:
        return None
    coeff_cols.sort(key=lambda name: int(name.split("p")[1]), reverse=True)
    coeffs = [float(row[c]) if not pd.isna(row[c]) else 0.0 for c in coeff_cols]
    return np.poly1d(coeffs)

def _vandermonde(x, deg):
    return np.vander(x, N=deg + 1, increasing=False)

def fit_L2(x, y, deg, y_std=None):
    if y_std is not None:
        w = 1.0 / np.clip(y_std, 1e-12, np.inf)
        coeffs = np.polyfit(x, y, deg=deg, w=w)
    else:
        coeffs = np.polyfit(x, y, deg=deg)
    poly = np.poly1d(coeffs)
    resid = y - poly(x)
    return coeffs, poly, resid

def fit_Linf(x, y, deg, y_std=None, max_iter_irls=25):
    if _HAS_SCIPY:
        X = _vandermonde(x, deg)
        if y_std is not None:
            w = 1.0 / np.clip(y_std, 1e-12, np.inf)
            Xw = X * w[:, None]
            yw = y * w
        else:
            Xw = X
            yw = y
        n, m = Xw.shape
        A1 = np.hstack([Xw, -np.ones((n, 1))])
        b1 = yw
        A2 = np.hstack([-Xw, -np.ones((n, 1))])
        b2 = -yw
        A_ub = np.vstack([A1, A2])
        b_ub = np.concatenate([b1, b2])
        c_vec = np.zeros(m + 1)
        c_vec[-1] = 1.0
        bounds = [(None, None)] * m + [(0, None)]
        result = linprog(c_vec, A_ub=A_ub, b_ub=b_ub, bounds=bounds, method="highs")
        if not result.success:
            raise RuntimeError(result.message)
        coeffs = result.x[:-1]
        poly = np.poly1d(coeffs)
        resid = y - poly(x)
        return coeffs, poly, resid
    coeffs = np.polyfit(x, y, deg=deg)
    for _ in range(max_iter_irls):
        poly = np.poly1d(coeffs)
        resid = y - poly(x)
        scale = np.clip(np.abs(resid), 1e-9, None)
        if y_std is not None:
            scale = scale / np.clip(y_std, 1e-12, np.inf)
        weight = 1.0 / scale
        coeffs = np.polyfit(x, y, deg=deg, w=weight)
    poly = np.poly1d(coeffs)
    resid = y - poly(x)
    return coeffs, poly, resid

def build_model_comparison_figure(sensor: str, subset: pd.DataFrame) -> go.Figure | None:
    if subset.empty:
        return None
    g = subset.sort_values("x_mean").reset_index(drop=True)
    x = g["x_mean"].astype(float).to_numpy()
    y = g["y_mean"].astype(float).to_numpy()
    if len(x) < 2:
        return None
    x_std = g["x_std"].astype(float).to_numpy() if "x_std" in g.columns else np.full_like(x, np.nan)
    y_std = g["y_std"].astype(float).to_numpy() if "y_std" in g.columns else np.full_like(y, np.nan)
    n = len(x)
    deg_max = max(1, n - 2)
    model_bank = []
    for deg in range(1, deg_max + 1):
        c2, p2, r2 = fit_L2(x, y, deg, y_std)
        model_bank.append({"method": "L2", "deg": deg, "poly": p2, "resid": r2})
        cI, pI, rI = fit_Linf(x, y, deg, y_std)
        model_bank.append({"method": "L_inf", "deg": deg, "poly": pI, "resid": rI})
    xs = np.linspace(float(x.min()), float(x.max()), 200)
    fig = make_subplots(rows=2, cols=2, row_heights=[0.6, 0.4], vertical_spacing=0.12, horizontal_spacing=0.12, subplot_titles=(f"{friendly_sensor(sensor)}: L2", f"{friendly_sensor(sensor)}: L_inf", "Остатки L2", "Остатки L_inf"))
    fig.add_trace(go.Scatter(x=x, y=y, mode="markers", name="Точки", marker=dict(size=9), error_x=dict(type="data", array=np.nan_to_num(x_std, nan=0.0)), error_y=dict(type="data", array=np.nan_to_num(y_std, nan=0.0))), row=1, col=1)
    fig.add_trace(go.Scatter(x=x, y=y, mode="markers", name="Точки", marker=dict(size=9, symbol="circle-open"), showlegend=False), row=1, col=2)
    for item in model_bank:
        trace = go.Scatter(x=xs, y=item["poly"](xs), mode="lines", name=f"{item['method']} deg={item['deg']}")
        if item["method"] == "L2":
            fig.add_trace(trace, row=1, col=1)
        else:
            fig.add_trace(trace, row=1, col=2)
    labels = [f"{bc:.1f}" for bc in g["bin_center"]] if "bin_center" in g.columns else [f"{val:.2f}" for val in x]
    for item in model_bank:
        bar = go.Bar(x=list(range(len(labels))), y=item["resid"], name=f"{item['method']} deg={item['deg']}")
        if item["method"] == "L2":
            fig.add_trace(bar, row=2, col=1)
        else:
            fig.add_trace(bar, row=2, col=2)
    fig.update_xaxes(title_text="Полочка", tickmode="array", tickvals=list(range(len(labels))), ticktext=labels, row=2, col=1)
    fig.update_xaxes(title_text="Полочка", tickmode="array", tickvals=list(range(len(labels))), ticktext=labels, row=2, col=2)
    fig.update_yaxes(title_text=f"{REF_COL} (°C)", row=1, col=1)
    fig.update_yaxes(title_text=f"{REF_COL} (°C)", row=1, col=2)
    fig.update_yaxes(title_text="Y - Ŷ (°C)", row=2, col=1)
    fig.update_yaxes(title_text="Y - Ŷ (°C)", row=2, col=2)
    fig.update_layout(height=760, legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5), margin=dict(t=100, l=60, r=40, b=90))
    return fig

sensor_summaries: list[dict[str, float | str]] = []
for sensor in follow_order:
    subset = calibration_points_by_sensor[calibration_points_by_sensor['sensor'] == sensor]
    summary: dict[str, float | str] = {'sensor': sensor, 'label': friendly_sensor(sensor)}
    if not subset.empty:
        summary['std_sensor_mean'] = float(subset['x_std'].mean()) if 'x_std' in subset.columns else np.nan
        summary['std_sensor_max'] = float(subset['x_std'].max()) if 'x_std' in subset.columns else np.nan
        summary['std_ref_mean'] = float(subset['y_std'].mean()) if 'y_std' in subset.columns else np.nan
        summary['std_ref_max'] = float(subset['y_std'].max()) if 'y_std' in subset.columns else np.nan
    else:
        summary['std_sensor_mean'] = summary['std_sensor_max'] = np.nan
        summary['std_ref_mean'] = summary['std_ref_max'] = np.nan
    if 'calibration_models' in globals() and calibration_models is not None and not calibration_models.empty:
        cm = calibration_models[calibration_models['sensor'] == sensor]
        if not cm.empty:
            if 'n_points' in cm.columns and not cm['n_points'].isna().all():
                n_pts = int(cm['n_points'].max())
            else:
                n_pts = len(subset)
            deg_star = max(1, n_pts - 2)
            row_l2 = cm[(cm['method'] == 'L2') & (cm['degree'] == deg_star)].head(1)
            row_linf = cm[(cm['method'] == 'L_inf') & (cm['degree'] == deg_star)].head(1)
            poly_l2 = build_poly_from_row(row_l2.iloc[0]) if not row_l2.empty else None
            poly_linf = build_poly_from_row(row_linf.iloc[0]) if not row_linf.empty else None
            if not row_l2.empty:
                summary['rmse_L2'] = float(row_l2.iloc[0].get('rmse', np.nan))
                summary['mae_L2'] = float(row_l2.iloc[0].get('mae', np.nan))
            if not row_linf.empty:
                summary['rmse_L_inf'] = float(row_linf.iloc[0].get('rmse', np.nan))
                summary['mae_L_inf'] = float(row_linf.iloc[0].get('mae', np.nan))
            if poly_l2 is not None and poly_linf is not None:
                xs = subset['x_mean'].astype(float).to_numpy()
                if xs.size == 0 and has_data and sensor in DATA.columns:
                    xs = np.linspace(DATA[sensor].min(), DATA[sensor].max(), 200)
                if xs.size > 0:
                    diff = np.abs(poly_l2(xs) - poly_linf(xs))
                    summary['poly_delta_mean'] = float(np.mean(diff))
                    summary['poly_delta_max'] = float(np.max(diff))
    sensor_summaries.append(summary)

now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
parts.append('<h1>Краткий отчёт калибровки</h1>')
parts.append(f"<p>Время формирования: {html.escape(now_str)}</p>")

open_section('Список сенсоров', opened=True)
parts.append('<ul>')
ref_label = ensure_suffix(friendly_sensor(REF_COL) if ref_display == REF_COL else ref_display, REF_COL)
parts.append(f"<li>Эталон: {html.escape(ref_label)}</li>")
for idx, sensor in enumerate(follow_order, start=1):
    friendly = ensure_suffix(friendly_sensor(sensor), sensor)
    parts.append(f"<li>Калибруемый: Датчик {idx}: {html.escape(friendly)}</li>")
parts.append('</ul>')
if follow_order:
    mapping_descr = '; '.join([f"Датчик {i} = {ensure_suffix(friendly_sensor(s), s)}" for i, s in enumerate(follow_order, start=1)])
    parts.append(f"<p><i>Нумерация:</i> {html.escape(mapping_descr)}</p>")
close_section()

if 'calibration_models' in globals() and calibration_models is not None and not calibration_models.empty:
    open_section('Формулы калибровки (степень N−2) и метрики')
    for sensor in follow_order:
        cm = calibration_models[calibration_models['sensor'] == sensor]
        if cm.empty:
            continue
        if 'n_points' in cm.columns and not cm['n_points'].isna().all():
            n_pts = int(cm['n_points'].max())
        else:
            n_pts = len(calibration_points_by_sensor[calibration_points_by_sensor['sensor'] == sensor])
        deg_star = max(1, n_pts - 2)
        parts.append(f"<h3>{html.escape(friendly_sensor(sensor))}</h3>")
        for method in ('L2', 'L_inf'):
            row = cm[(cm['method'] == method) & (cm['degree'] == deg_star)].head(1)
            if row.empty:
                continue
            rec = row.iloc[0]
            parts.append(
                f"<p><b>{html.escape(method)}:</b> "
                f"{html.escape(str(rec.get('formula', '')))} | "
                f"RMSE={rec.get('rmse', float('nan')):.6g}, "
                f"MAE={rec.get('mae', float('nan')):.6g}, "
                f"Max|err|={rec.get('maxerr', float('nan')):.6g}</p>"
            )
    close_section()
else:
    parts.append('<p><b>Формулы калибровки:</b> выполните ячейку построения моделей, чтобы получить коэффициенты.</p>')

open_section('Калибровочные точки X–Y', 'Статистика по усреднённым температурам и разбросам внутри корзин.')
base_columns = ['bin_center', 'bin_low', 'bin_high', 'x_mean', 'x_std', 'y_mean', 'y_std', 'n_samples', 'start_date', 'end_date', 'source_file']
for sensor in follow_order:
    subset = calibration_points_by_sensor[calibration_points_by_sensor['sensor'] == sensor]
    if subset.empty:
        continue
    cols = [col for col in base_columns if col in subset.columns]
    table = subset.loc[:, cols].copy()
    for col in ['start_date', 'end_date']:
        if col in table.columns:
            table[col] = pd.to_datetime(table[col], errors='coerce').dt.strftime('%Y-%m-%d %H:%M:%S')
    parts.append(f"<h3>{html.escape(friendly_sensor(sensor))}</h3>")
    parts.append(table.to_html(index=False, formatters={c: fmt_float for c in table.columns}, na_rep=''))
close_section()

if 'STABLE_BY_SENSOR' in globals() and STABLE_BY_SENSOR is not None and not STABLE_BY_SENSOR.empty:
    open_section('Стабильные интервалы и временные ряды')
    for sensor in follow_order:
        fig = build_stability_figure(sensor, STABLE_BY_SENSOR[STABLE_BY_SENSOR['sensor'] == sensor])
        add_figure(fig, f"{friendly_sensor(sensor)}: временной ряд и устойчивые сегменты", parts)
    close_section()

open_section('Выводы по точности датчиков')
parts.append('<ul>')
for summary in sensor_summaries:
    parts.append('<li><b>' + html.escape(summary['label']) + '</b>: '
                 'σ датчика ≈ ' + (fmt_float(summary.get('std_sensor_mean')) or '—') + ' °C (max ' + (fmt_float(summary.get('std_sensor_max')) or '—') + ' °C); '
                 'σ эталона ≈ ' + (fmt_float(summary.get('std_ref_mean')) or '—') + ' °C (max ' + (fmt_float(summary.get('std_ref_max')) or '—') + ' °C); '
                 'RMSE L2 = ' + (fmt_float(summary.get('rmse_L2')) or '—') + ' °C, RMSE L_inf = ' + (fmt_float(summary.get('rmse_L_inf')) or '—') + ' °C; '
                 'Δ(L2,L_inf) ≈ ' + (fmt_float(summary.get('poly_delta_mean')) or '—') + ' °C (max ' + (fmt_float(summary.get('poly_delta_max')) or '—') + ' °C).</li>')
parts.append('</ul>')
close_section()

open_section('Графики полиномиальных аппроксимаций')
for sensor in follow_order:
    fig = build_polyfit_figure(sensor, calibration_points_by_sensor[calibration_points_by_sensor['sensor'] == sensor])
    add_figure(fig, f"{friendly_sensor(sensor)}: полиномиальные аппроксимации и остатки", parts)
close_section()

open_section('L2 vs L_inf: сравнение моделей и остатки')
for sensor in follow_order:
    fig = build_model_comparison_figure(sensor, calibration_points_by_sensor[calibration_points_by_sensor['sensor'] == sensor])
    add_figure(fig, f"{friendly_sensor(sensor)}: сравнение моделей L2 и L_inf", parts)
close_section()

open_section('Приложение: базовые графики X–Y')
for sensor in follow_order:
    subset = calibration_points_by_sensor[calibration_points_by_sensor['sensor'] == sensor]
    if subset.empty:
        continue
    fig = make_subplots(rows=2, cols=1, row_heights=[0.65, 0.35], vertical_spacing=0.12)
    x_vals = subset['x_mean'].astype(float).to_numpy()
    y_vals = subset['y_mean'].astype(float).to_numpy()
    x_err = subset['x_std'].astype(float).to_numpy() if 'x_std' in subset.columns else np.full_like(x_vals, 0.0)
    y_err = subset['y_std'].astype(float).to_numpy() if 'y_std' in subset.columns else np.full_like(y_vals, 0.0)
    labels = [f"{bc:.1f}" for bc in subset['bin_center']] if 'bin_center' in subset.columns else [str(i) for i in range(len(subset))]
    fig.add_trace(go.Scatter(x=x_vals, y=y_vals, mode='markers', name='Точки', marker=dict(size=9), error_x=dict(type='data', array=x_err), error_y=dict(type='data', array=y_err)), row=1, col=1)
    fig.add_trace(go.Bar(x=list(range(len(labels))), y=x_err, name='std X'), row=2, col=1)
    fig.add_trace(go.Bar(x=list(range(len(labels))), y=y_err, name='std Y'), row=2, col=1)
    fig.update_xaxes(title_text=f"{sensor} (X)", row=1, col=1)
    fig.update_yaxes(title_text=f"{REF_COL} (Y)", row=1, col=1)
    fig.update_xaxes(title_text='Центр полочки (°C)', tickmode='array', tickvals=list(range(len(labels))), ticktext=labels, row=2, col=1)
    fig.update_yaxes(title_text='Std (°C)', row=2, col=1)
    fig.update_layout(title=f"{friendly_sensor(sensor)} — X–Y и ошибки", height=620, legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='center', x=0.5), margin=dict(t=80, l=60, r=40, b=70))
    add_figure(fig, f"{friendly_sensor(sensor)}: базовые графики X–Y и дисперсии", parts)
close_section()

open_section('Теоретическая справка и формулы', opened=False)
parts.append('<p><b>Структура процесса.</b> Сырые логи агрегируются, эталон фильтруется скользящими окнами (STD_THR, DIFF_THR), устойчивые сегменты длиной не менее MIN_LEN группируются по корзинам DEG_TOL, после чего усреднённые пары (датчик, эталон) используются для построения моделей.</p>')
parts.append(r'<p><b>Взвешенная МНК.</b> Используются веса 436w_i = \frac{1}{\max(\sigma_{ref,i}, \varepsilon)}436 и минимизируется функционал 436\sum_i w_i^2 (T_{sens,i} - p^{(d)}(T_{ref,i}))^2.436</p>')
parts.append(r'<p><b>Минимакс модель.</b> Полином ^{(d)}$ строится как решение 436\min_{p \in \Pi_d} \max_i |T_{sens,i} - p(T_{ref,i})|,436 реализованное через линейное программирование HiGHS или IRLS-приближение.</p>')
parts.append('<p><b>Интерпретация.</b> Полученные коэффициенты переводят температуру датчика в шкалу эталона. Показатели σ и Δ(L2,L_inf) позволяют контролировать качество исходных данных и чувствительность модели к выбросам.</p>')
close_section()

values_sensor = [s.get('std_sensor_mean') for s in sensor_summaries]
values_ref = [s.get('std_ref_mean') for s in sensor_summaries]
values_rmse_l2 = [s.get('rmse_L2') for s in sensor_summaries]
values_rmse_linf = [s.get('rmse_L_inf') for s in sensor_summaries]
values_delta = [s.get('poly_delta_max') for s in sensor_summaries]

def _nanmean_safe(values):
    arr = np.array([float(v) if v is not None else np.nan for v in values], dtype=float)
    return float(np.nanmean(arr)) if arr.size and not np.isnan(arr).all() else np.nan

def _nanmax_safe(values):
    arr = np.array([float(v) if v is not None else np.nan for v in values], dtype=float)
    return float(np.nanmax(arr)) if arr.size and not np.isnan(arr).all() else np.nan

open_section('Итоговая оценка качества данных')
mean_sensor_sigma = _nanmean_safe(values_sensor)
mean_ref_sigma = _nanmean_safe(values_ref)
mean_rmse_l2 = _nanmean_safe(values_rmse_l2)
mean_rmse_linf = _nanmean_safe(values_rmse_linf)
max_delta = _nanmax_safe(values_delta)
parts.append('<p>Среднее σ датчиков: ' + (fmt_float(mean_sensor_sigma) or '—') + ' °C; σ эталона: ' + (fmt_float(mean_ref_sigma) or '—') + ' °C.</p>')
parts.append('<p>Средний RMSE: L2 = ' + (fmt_float(mean_rmse_l2) or '—') + ' °C, L_inf = ' + (fmt_float(mean_rmse_linf) or '—') + ' °C.</p>')
parts.append('<p>Максимальное расхождение между L2 и L_inf: ' + (fmt_float(max_delta) or '—') + ' °C.</p>')
parts.append('<p>Случайная погрешность итоговой калибровки оценивается как ' + (fmt_float(mean_rmse_l2) or '—') + ' °C (L2) и ' + (fmt_float(mean_rmse_linf) or '—') + ' °C (L_inf). Качество исходных данных признаётся удовлетворительным, если σ эталона остаётся ниже MAX_REF_RANGE.</p>')
close_section()

css = """
body{font-family:Segoe UI,Arial,sans-serif;line-height:1.35}
table{border-collapse:collapse;margin-bottom:18px}
td,th{border:1px solid #ddd;padding:4px 6px}
h1,h2{margin-top:1em}
details.section{margin:18px 0;padding:8px 12px;border:1px solid #d0d7e2;border-radius:6px;background:#f7faff}
details.section>summary{cursor:pointer;font-weight:600;padding:4px 0}
details.section[open]{background:#eef4ff}
.figure{margin:16px 0}
.figure-caption{font-size:0.9em;color:#444;margin-top:4px}
.plotly-figure,.js-plotly-plot{margin:12px 0;}
"""

html_lines = [
    "<html>",
    "<head>",
    "<meta charset='utf-8'>",
    "<title>Отчёт калибровки</title>",
    "<style>" + css + "</style>",
    "<script src='https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js' defer></script>",
    "</head>",
    "<body>",
    "".join(parts),
    "</body>",
    "</html>"
]
html_report = "
".join(html_lines)

report_dir = Path(os.getenv('CALIBRATION_REPORT_DIR', Path.cwd()))
if not report_dir.exists():
    report_dir = Path.cwd()
report_dir.mkdir(parents=True, exist_ok=True)
report_path = report_dir / 'calibration_report.html'
report_path.write_text(html_report, encoding='utf-8')

print('HTML-отчёт сохранён:', report_path)


HTML-отчёт сохранён: c:\Users\Alexander\Documents\GitHub\sensor_calibration\calibration_report.html
