In [None]:
import re
import pandas as pd

# ───────────────────────── Парсер ──────────────────────────
def parse_smart_log(path_or_str, top_n=15):
    """Принимает путь к .txt‑файлу или сам текст лога, 
       отдаёт DataFrame со шагами, dev/test/gap и всеми гиперпараметрами."""
    
    # читаем либо из файла, либо из уже переданной строки
    if '\n' in path_or_str or 'Шаг' in path_or_str:
        lines = path_or_str.splitlines()
    else:
        with open(path_or_str, encoding='utf-8') as f:
            lines = f.readlines()

    rows, current = [], {}

    step_re = re.compile(
        r"Шаг\s+(\d+):\s*([^=]+?)=\s*\((.*?)\)"
    )
    mean_re = re.compile(r"MEAN\s*=\s*([0-9.]+)")
    gap_re  = re.compile(r"GAP\s*=\s*([+-]?[0-9.]+)")

    for i, raw in enumerate(lines):
        line = raw.rstrip("\n")
        
        # ── 1. ищем строку «Шаг N: …»  ───────────────────
        m = step_re.search(line)
        if m:
            # если предыдущий step уже набрал все метрики — сохраняем
            if current.get('dev') and current.get('test'):
                current.setdefault('gap', round(current['test'] - current['dev'], 4))
                rows.append(current)
            # начинаем новый шаг
            current = {'step': int(m.group(1))}
            
            keys = [k.strip() for k in m.group(2).split('+')]
            raw_vals = re.findall(r"'[^']*'|[^,]+", m.group(3))
            vals = [v.strip().strip("'") for v in raw_vals]
            for k, v in zip(keys, vals):
                try:
                    current[k] = eval(v)      # превращаем 0.001 → float, 8 → int
                except Exception:
                    current[k] = v            # если это строка без кавычек
    
        # ── 2. «Результаты (DEV):»  ───────────────────────
        if "Результаты (DEV):" in line:
            for j in range(i + 1, len(lines)):
                m = mean_re.search(lines[j])
                if m:
                    current['dev'] = float(m.group(1))
                    break
        
        # ── 3. «Результаты (TEST):» + GAP  ────────────────
        if "Результаты (TEST):" in line:
            for j in range(i + 1, len(lines)):
                m = mean_re.search(lines[j])
                if m:
                    current['test'] = float(m.group(1))
                    break
            for j in range(i + 1, len(lines)):
                g = gap_re.search(lines[j])
                if g:
                    current['gap'] = float(g.group(1))
                    break

    # не забываем «добавить хвост»
    if current.get('dev') and current.get('test'):
        current.setdefault('gap', round(current['test'] - current['dev'], 4))
        rows.append(current)

    df = pd.DataFrame(rows)
    if not df.empty:
        df = df.sort_values('test', ascending=False)
        if top_n is not None:
            df = df.head(top_n)
        df = df.reset_index(drop=True)
    return df


In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/10.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(15))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/20.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(15))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/30.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(15))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/40.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(15))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/50.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(15))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/60.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/70.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/80.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/90.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/100.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/bi/10.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/bi/20.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/bi/30.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/bi/40.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/bi/50.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/5862_адам лучший.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/sgd_2.txt",25)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(25))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/biformer.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/BiForm_wtb.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/smoothing.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/mambas.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/bigated.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/bigraph.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/bigatedgraph.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/smothing/phi.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/smothing/qwen.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/BiGraphFormerWithProb.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
df = parse_smart_log("C:/Users/Alexandr/Desktop/sampling/last/BiGatedGraphFormerWithProb.txt",50)

from IPython.display import display
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 160)

display(df.head(50))

In [None]:
# ======================================================================
# Проверка синтетического корпуса MELD-S на «битые» эмбеддинги
# ======================================================================

# ---------- 1. Импорты и базовые настройки ----------
import os, logging, traceback
import torch, torchaudio
import pandas as pd
from tqdm.auto import tqdm

# --- если проект находится в другом каталоге, добавьте его в sys.path ---
# import sys; sys.path.append(r"C:\Prgrm\ESWA_2025")

from data_loading.feature_extractor import (
    PretrainedAudioEmbeddingExtractor,
    PretrainedTextEmbeddingExtractor,
)

# ---------- 2. Пути из вашего config.toml ----------
synthetic_path = r"E:/MELD_S"
synth_csv_path = os.path.join(synthetic_path, "meld_s_train_labels.csv")
synth_wav_dir  = os.path.join(synthetic_path, "wavs")

# ---------- 3. Создаём экстракторы ровно как в основном проекте ----------
AUDIO_MODEL     = "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"
AUDIO_CKPT      = "best_audio_model_2.pt"          # путь относительно запуска
TEXT_MODEL      = "jinaai/jina-embeddings-v3"
TEXT_CKPT       = "best_text_model.pth"
DEVICE          = "cuda" if torch.cuda.is_available() else "cpu"
SAMPLE_RATE     = 16000

audio_feat = PretrainedAudioEmbeddingExtractor(
    model_name=AUDIO_MODEL,
    checkpoint=AUDIO_CKPT,
    device=DEVICE,
)

text_feat = PretrainedTextEmbeddingExtractor(
    model_name=TEXT_MODEL,
    checkpoint=TEXT_CKPT,
    device=DEVICE,
)

# ---------- 4. Узнаём фактические размеры эмбеддингов ----------
with torch.no_grad():
    dummy_wav = torch.zeros(1, SAMPLE_RATE)            # секунда тишины
    _, a_emb = audio_feat.extract(dummy_wav[0], SAMPLE_RATE)
    AUDIO_DIM = a_emb[0].shape[-1]

    _, t_emb = text_feat.extract("hello world")
    TEXT_DIM  = t_emb[0].shape[-1]

# сколько логитов выдаёт каждый классификатор
NUM_EMOTIONS = 7          # ["anger", "disgust", ...] — как в config
PRED_DIM = NUM_EMOTIONS

EXPECTED_ALL = AUDIO_DIM + TEXT_DIM + 2 * PRED_DIM
print(f"AUDIO_DIM = {AUDIO_DIM},  TEXT_DIM = {TEXT_DIM},  "
      f"TOTAL EXPECTED = {EXPECTED_ALL}")

# ---------- 5. Читаем CSV синтетики ----------
df = pd.read_csv(synth_csv_path)
print(f"Всего строк в CSV: {len(df)}")

bad_rows, good_cnt = [], 0

# ---------- 6. Проходим по записям ----------
for i, row in tqdm(df.iterrows(), total=len(df)):
    video_name = row["video_name"]
    wav_path   = os.path.join(synth_wav_dir, f"{video_name}.wav")
    txt        = row.get("text", "")

    reason = None
    try:
        # 6.1 Проверяем, существует ли wav-файл
        if not os.path.exists(wav_path):
            reason = "file_missing"

        # 6.2 Получаем аудио-эмбеддинг
        if reason is None:
            wf, sr = torchaudio.load(wav_path)
            if sr != SAMPLE_RATE:
                wf = torchaudio.transforms.Resample(sr, SAMPLE_RATE)(wf)
            a_pred, a_emb = audio_feat.extract(wf[0], SAMPLE_RATE)
            a_emb = a_emb[0]
            if a_emb.shape[-1] != AUDIO_DIM:
                reason = f"audio_dim_{a_emb.shape[-1]}"

        # 6.3 Получаем текст-эмбеддинг
        if reason is None:
            t_pred, t_emb = text_feat.extract(txt)
            t_emb = t_emb[0]
            if t_emb.shape[-1] != TEXT_DIM:
                reason = f"text_dim_{t_emb.shape[-1]}"

        # 6.4 Проверяем полную конкатенацию
        if reason is None:
            full_vec = torch.cat([a_emb, t_emb, a_pred[0], t_pred[0]], dim=-1)
            if full_vec.shape[-1] != EXPECTED_ALL:
                reason = f"concat_dim_{full_vec.shape[-1]}"

    except Exception as e:
        reason = "exception_" + e.__class__.__name__
        logging.error(f"{video_name}: {traceback.format_exc(limit=2)}")

    # 6.5 Сохраняем результат
    if reason:
        bad_rows.append({
            "idx": i,
            "video_name": video_name,
            "reason": reason,
            "wav_path": wav_path,
            "text_len": len(txt),
        })
    else:
        good_cnt += 1

# ---------- 7. Итоги ----------
print(f"\n✅ GOOD : {good_cnt}")
print(f"❌ BAD  : {len(bad_rows)}")

bad_df = pd.DataFrame(bad_rows)
display(bad_df)

# ---------- 8. (Необязательно) сохраняем список плохих файлов ----------
out_csv = os.path.join(synthetic_path, "bad_synth_meld.csv")
bad_df.to_csv(out_csv, index=False)
print(f"\nСписок «битых» примеров сохранён в: {out_csv}")


In [None]:
import torch
from models.models import BiFormer

# Создание модели с параметрами из config.toml
model = BiFormer(
    audio_dim=256,
    text_dim=1024,
    seg_len=95,
    hidden_dim=256,
    hidden_dim_gated=256,
    num_transformer_heads=8,
    num_graph_heads=2,
    positional_encoding=False,
    dropout=0.15,
    mode='mean',
    device="cuda",
    tr_layer_number=5,
    out_features=256,
    num_classes=7
)

# Подсчёт параметров
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")


In [None]:
from transformers import AutoModel, Wav2Vec2Model, WhisperModel
from transformers import AutoConfig
from pathlib import Path
import torch
import os

def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

def get_model_size(model_path):
    total_size = 0
    for file in Path(model_path).rglob("*"):
        if file.is_file():
            total_size += file.stat().st_size
    return total_size / (1024 ** 2)  # MB

# Список моделей
models_info = {
    "Jina Embeddings V3": ("jinaai/jina-embeddings-v3", AutoModel),
    "Wav2Vec2 Large Robust": ("audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim", Wav2Vec2Model),
    "Whisper Base": ("openai/whisper-base", WhisperModel)
}

cache_dir = "./hf_models"  # Папка, куда будет загружаться

for name, (model_name, model_class) in models_info.items():
    print(f"\n📦 Загрузка: {name}")
    model = model_class.from_pretrained(model_name, cache_dir=cache_dir, trust_remote_code=True)
    config = AutoConfig.from_pretrained(model_name, cache_dir=cache_dir, trust_remote_code=True)

    total, trainable = count_parameters(model)
    size_mb = get_model_size(Path(cache_dir) / model_name.replace("/", "-"))

    print(f"🔹 {name}")
    print(f"   • Total parameters:     {total:,}")
    print(f"   • Trainable parameters: {trainable:,}")
    print(f"   • Disk size:            {size_mb:.2f} MB")


In [None]:
import torch
import os
from data_loading.pretrained_extractors import get_model_mamba, Mamba

# Путь к весам
AUDIO_PATH = "best_audio_model_2.pt"
TEXT_PATH = "best_text_model.pth"

# ===== Audio classifier =====
audio_params = {
    "input_size": 1024,
    "d_model": 256,
    "num_layers": 2,
    "num_classes": 7,
    "dropout": 0.2
}
audio_model = get_model_mamba(audio_params)
audio_model.load_state_dict(torch.load(AUDIO_PATH, map_location="cpu"))
audio_params_count = sum(p.numel() for p in audio_model.parameters())

# ===== Text classifier =====
ckpt = torch.load(TEXT_PATH, map_location="cpu")
text_model = Mamba(
    num_layers=2,
    d_input=1024,
    d_model=512,
    num_classes=7,
    model_name="jinaai/jina-embeddings-v3",
    max_tokens=128,
    pooling=None
)
text_model.load_state_dict(ckpt["model_state_dict"])
text_params_count = sum(p.numel() for p in text_model.parameters())


# ===== Disk size =====
audio_size_mb = os.path.getsize(AUDIO_PATH) / 1024**2
text_size_mb = os.path.getsize(TEXT_PATH) / 1024**2

# ===== Print summary =====
print(f"🎙 Audio classifier: {audio_params_count:,} params | {audio_size_mb:.2f} MB")
print(f"📝 Text classifier: {text_params_count:,} params | {text_size_mb:.2f} MB")


In [None]:
import torch
from models.models import BiFormer
from data_loading.feature_extractor import PretrainedAudioEmbeddingExtractor, PretrainedTextEmbeddingExtractor
import torchaudio

# === 1. Настройки
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
text = "Why do all you’re coffee mugs have numbers on the bottom?"
audio_path = "E:/MELD/wavs/test/dia0_utt0.wav"

# === 2. Загружаем аудио
waveform, sr = torchaudio.load(audio_path)
waveform = waveform.squeeze(0)  # (T,)

# === 3. Экстрактор аудио-эмбеддингов
audio_extractor = PretrainedAudioEmbeddingExtractor(config=type('cfg', (), {
    "audio_model_name": "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim",
    "emb_device": device,
    "audio_pooling": "mean",
    "emb_normalize": True,
    "max_audio_frames": 0,
    "audio_classifier_checkpoint": "best_audio_model_2.pt"
})())

# === 4. Экстрактор текст-эмбеддингов
text_extractor = PretrainedTextEmbeddingExtractor(config=type('cfg', (), {
    "text_model_name": "jinaai/jina-embeddings-v3",
    "emb_device": device,
    "text_pooling": "mean",
    "emb_normalize": True,
    "max_tokens": 128,
    "text_classifier_checkpoint": "best_text_model.pth"
})())

# === 5. Извлечение эмбеддингов
_, audio_emb = audio_extractor.extract(waveform, sample_rate=sr)
_, text_emb = text_extractor.extract(text)

# === 6. Загрузка BiFormer
model = BiFormer(
    audio_dim=256,
    text_dim=1024,
    seg_len=95,
    hidden_dim=256,
    hidden_dim_gated=256,
    num_transformer_heads=8,
    num_graph_heads=2,
    positional_encoding=False,
    dropout=0.15,
    mode='mean',
    device=device,
    tr_layer_number=5,
    out_features=256,
    num_classes=7
).to(device)

model.load_state_dict(torch.load("checkpoints/best_model_dev_0_5895_epoch_8.pt", map_location=device))
model.eval()

# === 7. Подготовка входов и инференс
audio_emb = audio_emb.unsqueeze(0).to(device)  # (1, 256)
text_emb = text_emb.unsqueeze(0).to(device)    # (1, 1024)
logits = model(audio_emb, text_emb)
pred_class = torch.argmax(logits, dim=1).item()

# === 8. Вывод
print(f"Predicted emotion class: {pred_class}")
