In [49]:
import torch
import pandas as pd
import pickle
from tqdm.auto import tqdm
from pathlib import Path
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time
# === Настройки ===
EMOTION_LABELS = ["Neutral", "Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise", "Other"]
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "j-hartmann/emotion-english-distilroberta-base"
MODEL_DIR = Path("trial108/")
MAX_LEN = 192
BATCH_SIZE = 16
SAVE_DIR = Path("inference_roberta_outputs_4")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

# === CSV файлы ===
CSV_PATHS = [
    "AFEW/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv",
    "AffWild2/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv",
    "AFEW/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv",
    "AffWild2/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv",
    "C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_1.csv",
    "C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_2.csv",
]

# === Найдём лучшую модель ===
best_model_path = max(MODEL_DIR.glob("best_*.pth"), key=lambda p: float(p.stem.split("_")[1]))
print(f"🏆 Используем модель: {best_model_path}")


🏆 Используем модель: trial108\best_0.4361.pth


In [50]:
# === Загрузка модели и токенизатора ===
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=len(EMOTION_LABELS), ignore_mismatched_sizes=True
)
model.load_state_dict(torch.load(best_model_path, map_location=DEVICE))
model = model.to(DEVICE).eval()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at j-hartmann/emotion-english-distilroberta-base and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([8]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [51]:
# === Инференс и сохранение ===
for csv_path in CSV_PATHS:
    t0 = time.time()
    df = pd.read_csv(csv_path)
    texts = df["text"].tolist()
    source_files = df["video_name"].tolist()
    preds = []
    probs_all = []
    logits_all = []
    features_all = []

    print(f"\n📄 Файл: {csv_path} ({len(texts)} записей)")

    with torch.inference_mode():
        for i in tqdm(range(0, len(texts), BATCH_SIZE), desc="🔮 Инференс"):
            batch = texts[i:i + BATCH_SIZE]
            enc = tokenizer(batch, padding=True, truncation=True, max_length=MAX_LEN, return_tensors="pt")
            input_ids = enc["input_ids"].to(DEVICE)
            attention_mask = enc["attention_mask"].to(DEVICE)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
            logits = outputs.logits
            cls_embeddings = outputs.hidden_states[-1][:, 0].cpu()

            probs = torch.softmax(logits, dim=1).cpu()
            batch_preds = probs.argmax(dim=1).tolist()

            preds.extend(batch_preds)
            probs_all.extend(probs.tolist())
            logits_all.extend(logits.cpu().tolist())
            features_all.extend(cls_embeddings.tolist())

    df["predicted_label"] = preds
    df["predicted_emotion"] = [EMOTION_LABELS[i] for i in preds]
    df["predicted_prob"] = [max(p) for p in probs_all]

    # CSV save
    rel_path = Path(csv_path).with_suffix("").relative_to(Path(csv_path).anchor)
    save_csv_path = SAVE_DIR / rel_path.parent / (rel_path.name + "_pred.csv")
    save_csv_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(save_csv_path, index=False)
    print(f"✅ Сохранено: {save_csv_path} — ⏱️ {time.time()-t0:.1f}s")

    # Pickle save
    records = []
    for text, src, pred, prob, prob_vec, logit_vec, features in zip(
        texts, source_files, preds, [max(p) for p in probs_all], probs_all, logits_all, features_all
    ):
        records.append({
            "video_name": src,
            "text": text,
            "predicted_label": pred,
            "predicted_emotion": EMOTION_LABELS[pred],
            "predicted_prob": prob,
            "probs": prob_vec,
            "logits": logit_vec,
            "features": features
        })

    pkl_path = save_csv_path.with_suffix(".pkl")
    with open(pkl_path, "wb") as f:
        pickle.dump(records, f)
    print(f"📦 Сохранил фичи в: {pkl_path}")



📄 Файл: AFEW/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv (855 записей)


🔮 Инференс:   0%|          | 0/54 [00:00<?, ?it/s]

✅ Сохранено: inference_roberta_outputs_4\AFEW\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.csv — ⏱️ 1.0s
📦 Сохранил фичи в: inference_roberta_outputs_4\AFEW\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.pkl

📄 Файл: AffWild2/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1.csv (15158 записей)


🔮 Инференс:   0%|          | 0/948 [00:00<?, ?it/s]

✅ Сохранено: inference_roberta_outputs_4\AffWild2\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.csv — ⏱️ 11.7s
📦 Сохранил фичи в: inference_roberta_outputs_4\AffWild2\Qwen2.5-VL-32B-Instruct\train_segment_with_text_1_pred.pkl

📄 Файл: AFEW/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv (411 записей)


🔮 Инференс:   0%|          | 0/26 [00:00<?, ?it/s]

✅ Сохранено: inference_roberta_outputs_4\AFEW\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.csv — ⏱️ 0.3s
📦 Сохранил фичи в: inference_roberta_outputs_4\AFEW\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.pkl

📄 Файл: AffWild2/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1.csv (6450 записей)


🔮 Инференс:   0%|          | 0/404 [00:00<?, ?it/s]

✅ Сохранено: inference_roberta_outputs_4\AffWild2\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.csv — ⏱️ 5.0s
📦 Сохранил фичи в: inference_roberta_outputs_4\AffWild2\Qwen2.5-VL-32B-Instruct\dev_segment_with_text_1_pred.pkl

📄 Файл: C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_1.csv (449 записей)


🔮 Инференс:   0%|          | 0/29 [00:00<?, ?it/s]

✅ Сохранено: inference_roberta_outputs_4\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_1_pred.csv — ⏱️ 0.4s
📦 Сохранил фичи в: inference_roberta_outputs_4\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_1_pred.pkl

📄 Файл: C-EXPR-DB/Qwen2.5-VL-32B-Instruct/test_segment_with_text_2.csv (449 записей)


🔮 Инференс:   0%|          | 0/29 [00:00<?, ?it/s]

✅ Сохранено: inference_roberta_outputs_4\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_2_pred.csv — ⏱️ 0.4s
📦 Сохранил фичи в: inference_roberta_outputs_4\C-EXPR-DB\Qwen2.5-VL-32B-Instruct\test_segment_with_text_2_pred.pkl


In [52]:
import pandas as pd
from sklearn.metrics import recall_score, f1_score
from pathlib import Path

# === Настройки ===
PRED_FILES = [
    f"{SAVE_DIR}/AFEW/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1_pred.csv",
    f"{SAVE_DIR}/AffWild2/Qwen2.5-VL-32B-Instruct/dev_segment_with_text_1_pred.csv",
]
LABELS = ["Neutral", "Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise", "Other"]

# === Вычисление метрик ===
def calc_metrics(y_true, y_pred):
    return dict(
        UAR=recall_score(y_true, y_pred, average="macro", zero_division=0),
        WAR=recall_score(y_true, y_pred, average="weighted", zero_division=0),
        macroF1=f1_score(y_true, y_pred, average="macro", zero_division=0),
        weightedF1=f1_score(y_true, y_pred, average="weighted", zero_division=0),
    )

all_metrics = []

for path in PRED_FILES:
    df = pd.read_csv(path)
    y_true = df[LABELS].values.argmax(1)
    y_pred = df["predicted_label"].values
    metrics = calc_metrics(y_true, y_pred)
    print(f"📄 {Path(path).name}")
    avg_2 = 0
    for k, v in metrics.items():
        print(f"  {k}: {v:.4f}")
        if k == 'UAR' or k == 'macroF1':
            avg_2 += v
    
    print (f"sum:{avg_2:.4f}, average: {avg_2/2:.4f}")
    all_metrics.append(metrics)

# === Средние метрики ===
print("\n📊 Средние метрики:")
for k in all_metrics[0].keys():
    avg = sum(d[k] for d in all_metrics) / len(all_metrics)
    print(f"  {k}: {avg:.4f}")


📄 dev_segment_with_text_1_pred.csv
  UAR: 0.3204
  WAR: 0.3747
  macroF1: 0.3230
  weightedF1: 0.3826
sum:0.6434, average: 0.3217
📄 dev_segment_with_text_1_pred.csv
  UAR: 0.4468
  WAR: 0.4760
  macroF1: 0.3631
  weightedF1: 0.4890
sum:0.8099, average: 0.4049

📊 Средние метрики:
  UAR: 0.3836
  WAR: 0.4253
  macroF1: 0.3431
  weightedF1: 0.4358


In [53]:
with open("inference_roberta_outputs_4/AFEW/Qwen2.5-VL-32B-Instruct/train_segment_with_text_1_pred.pkl", "rb") as f:
    records = pickle.load(f)

print(f"🔍 Загружено {len(records)} записей")
print("Пример:")
print(records[:1])

🔍 Загружено 855 записей
Пример:
[{'video_name': '000046280___Angry_0000.avi', 'text': "The scene is dimly lit, creating a somber and intense atmosphere that amplifies the emotional weight of the moment. The individual's facial expression conveys a complex mix of emotions, primarily centered around fear and tension. Their eyes are wide open, with pupils dilated, suggesting heightened alertness or anxiety. The eyebrows are slightly furrowed, adding to the sense of concern or unease. The mouth is partially open, indicating a state of surprise or shock, possibly reacting to an unexpected event or threat. There is minimal movement of the head or body, which suggests a frozen or startled reaction rather than active engagement. The overall stillness and intensity of their expression, combined with the dark background, reinforce a strong sense of fear and vulnerability. This emotional state appears consistent throughout the frames, with no significant transition observed, maintaining a tense a

In [27]:
with open("inference_outputs/AffWild2/Qwen2.5-VL-7B-Instruct/train_segment_with_text_1_pred.pkl", "rb") as f:
    records = pickle.load(f)

print(f"🔍 Загружено {len(records)} записей")
print("Пример:")
print(records[:2])

🔍 Загружено 15158 записей
Пример:
[{'video_name': '105___7552_7672_0126.avi', 'text': 'The individual appears seated on a blue couch against a plain wall, wearing a red striped shirt, suggesting a casual setting. Their facial expression shifts from a wide smile to a more neutral look, indicating a transition from joy to a calmer state. The eyes widen slightly at one point, hinting at surprise or heightened interest, but quickly return to a neutral gaze. The overall body posture remains relaxed throughout, with minimal movement, reinforcing the sense of a calm and composed demeanor. The scene lacks any significant environmental elements that might influence the emotional state, focusing solely on the subtle changes in facial expressions and gaze.', 'predicted_label': 4, 'predicted_emotion': 'Happiness', 'predicted_prob': 0.3524705767631531, 'probs': [0.18866893649101257, 0.01651657558977604, 0.25110942125320435, 0.007982643321156502, 0.3524705767631531, 0.022828616201877594, 0.030429013