In [46]:
import torch
import pandas as pd
import pickle
from tqdm.auto import tqdm
from pathlib import Path
from transformers import AutoTokenizer, AutoModel
import time
# === Настройки ===
EMOTION_LABELS = ["Neutral", "Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise", "Other"]
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "jinaai/jina-embeddings-v3"
MODEL_DIR = Path("logs_rerun_best/20250627-111539")
MAX_LEN = 192
BATCH_SIZE = 48
SAVE_DIR = Path("inference_outputs")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

# === CSV файлы ===
CSV_PATHS = [
    "AFEW/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv",
    "AffWild2/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv",
    "AFEW/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv",
    "AffWild2/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv",
    "C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_1.csv",
    "C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_2.csv",
    
    
]

# === Найдём лучшую модель ===
best_model_path = max(MODEL_DIR.glob("best_*.pth"), key=lambda p: float(p.stem.split("_")[1]))
print(f"🏆 Используем модель: {best_model_path}")


🏆 Используем модель: logs_rerun_best\20250627-111539\best_0.4232.pth


In [47]:
# === Класс модели ===
class EmbeddingClassifier(torch.nn.Module):
    def __init__(self, base_model, embedding_dim=1024, num_classes=8):
        super().__init__()
        self.base = base_model
        self.classifier = torch.nn.Linear(embedding_dim, num_classes)

    def forward(self, input_ids, attention_mask):
        out = self.base(input_ids=input_ids, attention_mask=attention_mask)
        return out.last_hidden_state[:, 0], self.classifier(out.last_hidden_state[:, 0])

# === Загрузка модели и токенизатора ===
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
base_model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = EmbeddingClassifier(base_model, embedding_dim=1024, num_classes=len(EMOTION_LABELS)).to(DEVICE)
model.load_state_dict(torch.load(best_model_path, map_location=DEVICE))
model.eval().float()

flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn i

EmbeddingClassifier(
  (base): XLMRobertaLoRA(
    (roberta): XLMRobertaModel(
      (embeddings): XLMRobertaEmbeddings(
        (word_embeddings): ParametrizedEmbedding(
          250002, 1024, padding_idx=1
          (parametrizations): ModuleDict(
            (weight): ParametrizationList(
              (0): LoRAParametrization()
            )
          )
        )
        (token_type_embeddings): ParametrizedEmbedding(
          1, 1024
          (parametrizations): ModuleDict(
            (weight): ParametrizationList(
              (0): LoRAParametrization()
            )
          )
        )
      )
      (emb_drop): Dropout(p=0.1, inplace=False)
      (emb_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (encoder): XLMRobertaEncoder(
        (layers): ModuleList(
          (0-23): 24 x Block(
            (mixer): MHA(
              (rotary_emb): RotaryEmbedding()
              (Wqkv): ParametrizedLinearResidual(
                in_features=1024, out_features=3

In [48]:
# === Инференс и сохранение ===
for csv_path in CSV_PATHS:
    t0 = time.time()
    df = pd.read_csv(csv_path)
    texts = df["text"].tolist()
    source_files = df["video_name"].tolist()
    preds = []
    probs_all = []
    features_all = []

    print(f"\n📄 Файл: {csv_path} ({len(texts)} записей)")

    with torch.inference_mode():
        for i in tqdm(range(0, len(texts), BATCH_SIZE), desc="🔮 Инференс"):
            batch = texts[i:i + BATCH_SIZE]
            enc = tokenizer(batch, padding=True, truncation=True, max_length=MAX_LEN, return_tensors="pt")
            input_ids = enc["input_ids"].to(DEVICE)
            attention_mask = enc["attention_mask"].to(DEVICE)
            feats, logits = model(input_ids=input_ids, attention_mask=attention_mask)
            probs = torch.softmax(logits, dim=1).cpu()
            batch_preds = probs.argmax(dim=1).tolist()
            preds.extend(batch_preds)
            probs_all.extend(probs.tolist())
            features_all.extend(feats.cpu().tolist())

    df["predicted_label"] = preds
    df["predicted_emotion"] = [EMOTION_LABELS[i] for i in preds]
    df["predicted_prob"] = [max(p) for p in probs_all]

    # CSV save
    rel_path = Path(csv_path).with_suffix("").relative_to(Path(csv_path).anchor)
    save_csv_path = SAVE_DIR / rel_path.parent / (rel_path.name + "_pred.csv")
    save_csv_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(save_csv_path, index=False)
    print(f"✅ Сохранено: {save_csv_path} — ⏱️ {time.time()-t0:.1f}s")

    # Pickle save
    records = []
    for text, src, pred, prob, prob_vec, features in zip(texts, source_files, preds, [max(p) for p in probs_all], probs_all, features_all):
        records.append({
            "video_name": src,
            "text": text,
            "predicted_label": pred,
            "predicted_emotion": EMOTION_LABELS[pred],
            "predicted_prob": prob,
            "probs": prob_vec,
            "features": features
        })

    pkl_path = save_csv_path.with_suffix(".pkl")
    with open(pkl_path, "wb") as f:
        pickle.dump(records, f)
    print(f"📦 Сохранил фичи в: {pkl_path}")


📄 Файл: AFEW/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv (855 записей)


🔮 Инференс:   0%|          | 0/18 [00:00<?, ?it/s]

✅ Сохранено: inference_outputs\AFEW\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.csv — ⏱️ 298.0s
📦 Сохранил фичи в: inference_outputs\AFEW\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.pkl

📄 Файл: AffWild2/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv (15158 записей)


🔮 Инференс:   0%|          | 0/316 [00:00<?, ?it/s]

✅ Сохранено: inference_outputs\AffWild2\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.csv — ⏱️ 5468.7s
📦 Сохранил фичи в: inference_outputs\AffWild2\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.pkl

📄 Файл: AFEW/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv (411 записей)


🔮 Инференс:   0%|          | 0/9 [00:00<?, ?it/s]

✅ Сохранено: inference_outputs\AFEW\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.csv — ⏱️ 132.8s
📦 Сохранил фичи в: inference_outputs\AFEW\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.pkl

📄 Файл: AffWild2/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv (6450 записей)


🔮 Инференс:   0%|          | 0/135 [00:00<?, ?it/s]

✅ Сохранено: inference_outputs\AffWild2\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.csv — ⏱️ 2308.4s
📦 Сохранил фичи в: inference_outputs\AffWild2\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.pkl

📄 Файл: C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_1.csv (449 записей)


🔮 Инференс:   0%|          | 0/10 [00:00<?, ?it/s]

✅ Сохранено: inference_outputs\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_1_pred.csv — ⏱️ 149.8s
📦 Сохранил фичи в: inference_outputs\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_1_pred.pkl

📄 Файл: C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_2.csv (449 записей)


🔮 Инференс:   0%|          | 0/10 [00:00<?, ?it/s]

✅ Сохранено: inference_outputs\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_2_pred.csv — ⏱️ 149.7s
📦 Сохранил фичи в: inference_outputs\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_2_pred.pkl


In [50]:
with open("inference_outputs/AFEW/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1_pred.pkl", "rb") as f:
    records = pickle.load(f)

print(f"🔍 Загружено {len(records)} записей")
print("Пример:")
print(records[:2])

🔍 Загружено 855 записей
Пример:
[{'video_name': '000046280___Angry_0000.avi', 'text': "The scene is dimly lit, creating a somber and intense atmosphere that amplifies the emotional weight of the moment. The individual's facial expression conveys a complex mix of emotions, primarily centered around fear and tension. Their eyes are wide open, with pupils dilated, suggesting heightened alertness or anxiety. The eyebrows are slightly furrowed, adding to the sense of concern or unease. The mouth is partially open, indicating a state of surprise or shock, possibly reacting to an unexpected event or threat. There is minimal movement of the head or body, which suggests a frozen or startled reaction rather than active engagement. The overall stillness and intensity of their expression, combined with the dark background, reinforce a strong sense of fear and vulnerability. This emotional state appears consistent throughout the frames, with no significant transition observed, maintaining a tense a

In [27]:
with open("inference_outputs/AffWild2/Qwen2.5-VL-7B-Instruct/train_segment_with_text_1_pred.pkl", "rb") as f:
    records = pickle.load(f)

print(f"🔍 Загружено {len(records)} записей")
print("Пример:")
print(records[:2])

🔍 Загружено 15158 записей
Пример:
[{'video_name': '105___7552_7672_0126.avi', 'text': 'The individual appears seated on a blue couch against a plain wall, wearing a red striped shirt, suggesting a casual setting. Their facial expression shifts from a wide smile to a more neutral look, indicating a transition from joy to a calmer state. The eyes widen slightly at one point, hinting at surprise or heightened interest, but quickly return to a neutral gaze. The overall body posture remains relaxed throughout, with minimal movement, reinforcing the sense of a calm and composed demeanor. The scene lacks any significant environmental elements that might influence the emotional state, focusing solely on the subtle changes in facial expressions and gaze.', 'predicted_label': 4, 'predicted_emotion': 'Happiness', 'predicted_prob': 0.3524705767631531, 'probs': [0.18866893649101257, 0.01651657558977604, 0.25110942125320435, 0.007982643321156502, 0.3524705767631531, 0.022828616201877594, 0.030429013