# 双模态多特征融合实验 (Face版)

**视觉侧**: CLIP (高层抽象) + Face (面部图像CNN)

**文本侧**: BERT

**融合策略**:
1. Concat - 简单拼接 (基线)
2. Weighted Sum - 可学习权重加权
3. Gated - 门控融合
4. Hierarchical - 层次融合
5. Bilinear - 双线性交互
6. Cross-Attention - 跨模态注意力
7. Tensor - 张量融合

## 1. 配置

In [None]:
# ==================== GPU 选择 ====================
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # 改成你想用的卡号
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"  # 显存按需分配

In [None]:
# ==================== 特征配置 ====================
FOLD = 0  # 修改这里切换fold (0, 1, 2)

FEATURES_CONFIG = {
    'clip': {
        'name': f'{FOLD}CLIP',
        'file': f'./data2025/fold{FOLD}_visual_clip.pkl',
        'shape': (16, 512),
        'modality': 'visual'
    },
    'face': {
        'name': f'{FOLD}Face',
        'file': f'./data2025/fold{FOLD}_visual_face.pkl',
        'shape': (16, 224, 224, 3),  # 原始图像
        'modality': 'visual'
    },
    'bert': {
        'name': f'{FOLD}BERT',
        'file': f'./data2025/fold{FOLD}_textual_bert.pkl',
        'shape': (82, 768),
        'modality': 'text'
    }
}

LABEL_FILE = f'./data2025/fold{FOLD}_labels.pkl'

# ==================== 实验配置 ====================
NUM_RUNS = 5

# ==================== 保存路径 ====================
feature_names = '__'.join([cfg['name'] for cfg in FEATURES_CONFIG.values()])
SAVE_DIR = f'./data2025/resm/{feature_names}/fusion_tri'

print(f"Fold: {FOLD}")
print(f"特征配置:")
for key, cfg in FEATURES_CONFIG.items():
    print(f"  {key}: {cfg['shape']} ({cfg['modality']})")
print(f"重复训练次数: {NUM_RUNS}")
print(f"保存路径: {SAVE_DIR}")

In [None]:
import gc
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import Model, backend as K
from keras.layers import (Input, Dense, Conv1D, Conv2D, LSTM, Concatenate, Add, Multiply,
                          Attention, GlobalAveragePooling1D, GlobalAveragePooling2D, 
                          Dropout, BatchNormalization, Lambda, Layer, Flatten,
                          TimeDistributed, MaxPooling2D, Reshape)
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
from utilz import load_features

print(f"TensorFlow: {tf.__version__}")
print(f"GPU: {len(tf.config.list_physical_devices('GPU'))} available")

## 2. 数据加载

In [None]:
# 加载各特征
feature_data = {}
for key, cfg in FEATURES_CONFIG.items():
    data = load_features(cfg['file'])
    train_data = np.asarray(data['train'])
    test_data = np.asarray(data['test'])
    
    # Face图像归一化到0-1
    if key == 'face':
        if train_data.max() > 1:
            train_data = train_data.astype('float32') / 255.0
            test_data = test_data.astype('float32') / 255.0
    
    feature_data[key] = {
        'train': train_data,
        'test': test_data,
        'shape': cfg['shape']
    }
    print(f"{key}: train={feature_data[key]['train'].shape}, test={feature_data[key]['test'].shape}")

# 加载标签
label = load_features(LABEL_FILE)
y_train = np.asarray(label['train'])
y_test = np.asarray(label['test'])

# 便捷变量
X_clip_train = feature_data['clip']['train']
X_clip_test = feature_data['clip']['test']
X_face_train = feature_data['face']['train']
X_face_test = feature_data['face']['test']
X_bert_train = feature_data['bert']['train']
X_bert_test = feature_data['bert']['test']

print(f"\n训练集: {len(y_train)}, 测试集: {len(y_test)}")
print(f"Face数据范围: [{X_face_train.min():.2f}, {X_face_train.max():.2f}]")

# 类别信息
class_labels = ['Bored', 'Happy', 'Interested', 'Tired', 'Confused']
num_classes = 5

# 类别权重
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))
print(f"类别权重: {class_weight_dict}")

## 3. 编码器定义

In [None]:
CLIP_SHAPE = FEATURES_CONFIG['clip']['shape']
FACE_SHAPE = FEATURES_CONFIG['face']['shape']
BERT_SHAPE = FEATURES_CONFIG['bert']['shape']

def build_clip_encoder(input_shape=None):
    """
    CLIP编码器: Conv1D x 3 + LSTM -> 64维
    """
    if input_shape is None:
        input_shape = CLIP_SHAPE
    
    inp = Input(input_shape, name='clip_input')
    
    h = Conv1D(64, 3, 1, 'same', kernel_regularizer=l2(0.001))(inp)
    h = BatchNormalization()(h)
    h = tf.keras.layers.ReLU()(h)
    
    h = Conv1D(64, 1, 1, 'same', kernel_regularizer=l2(0.001))(h)
    h = BatchNormalization()(h)
    h = tf.keras.layers.ReLU()(h)
    
    h = Conv1D(64, 3, 1, 'same', kernel_regularizer=l2(0.001))(h)
    h = BatchNormalization()(h)
    h = tf.keras.layers.ReLU()(h)
    
    out = LSTM(64, activation='relu', dropout=0.2, recurrent_dropout=0.2)(h)
    
    return inp, out  # (batch, 64)


def build_face_encoder(input_shape=None):
    """
    Face图像编码器: TimeDistributed CNN + LSTM -> 64维
    输入: (16, 224, 224, 3) - 16帧面部图像
    """
    if input_shape is None:
        input_shape = FACE_SHAPE
    
    inp = Input(input_shape, name='face_input')  # (batch, 16, 224, 224, 3)
    
    # 轻量级CNN (TimeDistributed应用到每一帧)
    h = TimeDistributed(Conv2D(32, 3, 2, 'same', kernel_regularizer=l2(0.001)))(inp)  # 112x112
    h = TimeDistributed(BatchNormalization())(h)
    h = TimeDistributed(tf.keras.layers.ReLU())(h)
    h = TimeDistributed(MaxPooling2D(2))(h)  # 56x56
    
    h = TimeDistributed(Conv2D(64, 3, 2, 'same', kernel_regularizer=l2(0.001)))(h)  # 28x28
    h = TimeDistributed(BatchNormalization())(h)
    h = TimeDistributed(tf.keras.layers.ReLU())(h)
    h = TimeDistributed(MaxPooling2D(2))(h)  # 14x14
    
    h = TimeDistributed(Conv2D(128, 3, 2, 'same', kernel_regularizer=l2(0.001)))(h)  # 7x7
    h = TimeDistributed(BatchNormalization())(h)
    h = TimeDistributed(tf.keras.layers.ReLU())(h)
    
    # 全局池化得到每帧特征
    h = TimeDistributed(GlobalAveragePooling2D())(h)  # (batch, 16, 128)
    
    # LSTM处理时序
    out = LSTM(64, activation='relu', dropout=0.2, recurrent_dropout=0.2)(h)
    
    return inp, out  # (batch, 64)


def build_bert_encoder(input_shape=None):
    """
    BERT编码器: Conv1D + Attention + Pool -> 128维
    """
    if input_shape is None:
        input_shape = BERT_SHAPE
    
    inp = Input(input_shape, name='bert_input')
    
    q = Conv1D(64, 3, 1, kernel_regularizer=l2(0.001))(inp)
    v = Conv1D(64, 3, 1, kernel_regularizer=l2(0.001))(inp)
    attn = Attention()([q, v])
    
    q_pool = GlobalAveragePooling1D()(q)
    attn_pool = GlobalAveragePooling1D()(attn)
    
    out = Concatenate()([q_pool, attn_pool])  # (batch, 128)
    
    return inp, out


print("编码器定义完成")
print(f"  CLIP:  {CLIP_SHAPE} -> 64维")
print(f"  Face:  {FACE_SHAPE} -> 64维 (CNN+LSTM)")
print(f"  BERT:  {BERT_SHAPE} -> 128维")

## 4. 融合策略定义

In [None]:
# ==================== 融合策略 1: Concat ====================
def fusion_concat(clip_feat, face_feat, bert_feat):
    """
    简单拼接: 64 + 64 + 128 = 256维
    """
    return Concatenate(name='fusion_concat')([clip_feat, face_feat, bert_feat])


# ==================== 融合策略 2: Weighted Sum ====================
def fusion_weighted_sum(clip_feat, face_feat, bert_feat):
    """
    三路可学习权重加权求和
    输出: 128维
    """
    clip_proj = Dense(128, kernel_regularizer=l2(0.001))(clip_feat)
    face_proj = Dense(128, kernel_regularizer=l2(0.001))(face_feat)
    bert_proj = Dense(128, kernel_regularizer=l2(0.001))(bert_feat)
    
    concat_all = Concatenate()([clip_proj, face_proj, bert_proj])
    weights = Dense(3, activation='softmax', kernel_regularizer=l2(0.001))(concat_all)
    
    w1 = Lambda(lambda x: tf.expand_dims(x[:, 0], -1))(weights)
    w2 = Lambda(lambda x: tf.expand_dims(x[:, 1], -1))(weights)
    w3 = Lambda(lambda x: tf.expand_dims(x[:, 2], -1))(weights)
    
    fused = Add(name='fusion_weighted_sum')([
        Multiply()([clip_proj, w1]),
        Multiply()([face_proj, w2]),
        Multiply()([bert_proj, w3])
    ])
    
    return fused


# ==================== 融合策略 3: Gated ====================
def fusion_gated(clip_feat, face_feat, bert_feat):
    """
    三路门控融合
    输出: 128维
    """
    clip_proj = Dense(128, kernel_regularizer=l2(0.001))(clip_feat)
    face_proj = Dense(128, kernel_regularizer=l2(0.001))(face_feat)
    bert_proj = Dense(128, kernel_regularizer=l2(0.001))(bert_feat)
    
    concat_all = Concatenate()([clip_proj, face_proj, bert_proj])
    
    gate_clip = Dense(128, activation='sigmoid', kernel_regularizer=l2(0.001))(concat_all)
    gate_face = Dense(128, activation='sigmoid', kernel_regularizer=l2(0.001))(concat_all)
    gate_bert = Dense(128, activation='sigmoid', kernel_regularizer=l2(0.001))(concat_all)
    
    gated_clip = Multiply()([clip_proj, gate_clip])
    gated_face = Multiply()([face_proj, gate_face])
    gated_bert = Multiply()([bert_proj, gate_bert])
    
    fused = Add(name='fusion_gated')([gated_clip, gated_face, gated_bert])
    
    return fused


# ==================== 融合策略 4: Hierarchical ====================
def fusion_hierarchical(clip_feat, face_feat, bert_feat):
    """
    层次融合: 先视觉内融合，再跨模态融合
    输出: 128维
    """
    # 视觉内融合 (CLIP + Face)
    vis_concat = Concatenate()([clip_feat, face_feat])
    vis_gate = Dense(128, activation='sigmoid', kernel_regularizer=l2(0.001))(vis_concat)
    
    clip_proj = Dense(64, kernel_regularizer=l2(0.001))(clip_feat)
    face_proj = Dense(64, kernel_regularizer=l2(0.001))(face_feat)
    vis_proj = Concatenate()([clip_proj, face_proj])
    
    vis_fused = Multiply()([vis_gate, vis_proj])
    
    # 跨模态融合
    combined = Concatenate()([vis_fused, bert_feat])
    cross_gate = Dense(128, activation='sigmoid', kernel_regularizer=l2(0.001))(combined)
    
    vis_final = Dense(128, kernel_regularizer=l2(0.001))(vis_fused)
    tex_final = Dense(128, kernel_regularizer=l2(0.001))(bert_feat)
    
    fused = Add(name='fusion_hierarchical')([
        Multiply()([cross_gate, vis_final]), 
        Multiply()([Lambda(lambda x: 1-x)(cross_gate), tex_final])
    ])
    
    return fused


# ==================== 融合策略 5: Bilinear ====================
def fusion_bilinear(clip_feat, face_feat, bert_feat):
    """
    双线性融合
    输出: 128维
    """
    clip_proj = Dense(32, kernel_regularizer=l2(0.001))(clip_feat)
    face_proj = Dense(32, kernel_regularizer=l2(0.001))(face_feat)
    bert_proj = Dense(32, kernel_regularizer=l2(0.001))(bert_feat)
    
    # clip-face交互
    clip_exp = Lambda(lambda x: tf.expand_dims(x, 2))(clip_proj)
    face_exp = Lambda(lambda x: tf.expand_dims(x, 1))(face_proj)
    bilinear_vis = Lambda(lambda x: x[0] * x[1])([clip_exp, face_exp])
    bilinear_vis_flat = Flatten()(bilinear_vis)
    
    # vis-bert交互
    bert_exp = Lambda(lambda x: tf.expand_dims(x, 1))(bert_proj)
    bilinear_cross = Lambda(lambda x: x[0] * x[1])([clip_exp, bert_exp])
    bilinear_cross_flat = Flatten()(bilinear_cross)
    
    bilinear_all = Concatenate()([bilinear_vis_flat, bilinear_cross_flat])
    bilinear_compressed = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(bilinear_all)
    
    concat_orig = Concatenate()([clip_feat, face_feat, bert_feat])
    concat_proj = Dense(128, kernel_regularizer=l2(0.001))(concat_orig)
    
    fused = Add(name='fusion_bilinear')([bilinear_compressed, concat_proj])
    
    return fused


# ==================== 融合策略 6: Cross-Attention ====================
class MultiHeadCrossAttention(Layer):
    def __init__(self, units=64, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        
    def build(self, input_shape):
        self.wq = Dense(self.units, kernel_regularizer=l2(0.001))
        self.wk = Dense(self.units, kernel_regularizer=l2(0.001))
        self.wv = Dense(self.units, kernel_regularizer=l2(0.001))
        super().build(input_shape)
        
    def call(self, query, key_value):
        q = self.wq(query)
        k = self.wk(key_value)
        v = self.wv(key_value)
        
        scores = tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(tf.cast(self.units, tf.float32))
        attn_weights = tf.nn.softmax(scores, axis=-1)
        output = tf.matmul(attn_weights, v)
        return output
    
    def get_config(self):
        config = super().get_config()
        config.update({'units': self.units})
        return config


def fusion_cross_attention(clip_feat, face_feat, bert_feat):
    """
    跨模态注意力融合
    输出: 192维
    """
    clip_seq = Lambda(lambda x: tf.expand_dims(x, 1))(clip_feat)
    face_seq = Lambda(lambda x: tf.expand_dims(x, 1))(face_feat)
    bert_seq = Lambda(lambda x: tf.expand_dims(x, 1))(bert_feat)
    
    vis_seq = Concatenate(axis=1)([clip_seq, face_seq])
    
    cross_attn_t2v = MultiHeadCrossAttention(64, name='cross_attn_t2v')
    bert_attended = cross_attn_t2v(bert_seq, vis_seq)
    bert_out = Lambda(lambda x: tf.squeeze(x, 1))(bert_attended)
    
    cross_attn_v2t = MultiHeadCrossAttention(64, name='cross_attn_v2t')
    clip_attended = cross_attn_v2t(clip_seq, bert_seq)
    face_attended = cross_attn_v2t(face_seq, bert_seq)
    clip_out = Lambda(lambda x: tf.squeeze(x, 1))(clip_attended)
    face_out = Lambda(lambda x: tf.squeeze(x, 1))(face_attended)
    
    fused = Concatenate(name='fusion_cross_attention')([clip_out, face_out, bert_out])
    return fused


# ==================== 融合策略 7: Tensor ====================
def fusion_tensor(clip_feat, face_feat, bert_feat):
    """
    张量融合
    输出: 128维
    """
    clip_proj = Dense(16, kernel_regularizer=l2(0.001))(clip_feat)
    face_proj = Dense(16, kernel_regularizer=l2(0.001))(face_feat)
    bert_proj = Dense(16, kernel_regularizer=l2(0.001))(bert_feat)
    
    clip_with_1 = Lambda(lambda x: tf.concat([x, tf.ones_like(x[:, :1])], axis=1))(clip_proj)
    face_with_1 = Lambda(lambda x: tf.concat([x, tf.ones_like(x[:, :1])], axis=1))(face_proj)
    bert_with_1 = Lambda(lambda x: tf.concat([x, tf.ones_like(x[:, :1])], axis=1))(bert_proj)
    
    clip_exp = Lambda(lambda x: tf.expand_dims(x, 2))(clip_with_1)
    face_exp = Lambda(lambda x: tf.expand_dims(x, 1))(face_with_1)
    tensor_vis = Lambda(lambda x: x[0] * x[1])([clip_exp, face_exp])
    tensor_vis_flat = Flatten()(tensor_vis)
    
    vis_compressed = Dense(17, kernel_regularizer=l2(0.001))(tensor_vis_flat)
    vis_exp = Lambda(lambda x: tf.expand_dims(x, 2))(vis_compressed)
    bert_exp = Lambda(lambda x: tf.expand_dims(x, 1))(bert_with_1)
    tensor_all = Lambda(lambda x: x[0] * x[1])([vis_exp, bert_exp])
    tensor_flat = Flatten()(tensor_all)
    
    fused = Dense(128, activation='relu', kernel_regularizer=l2(0.001), name='fusion_tensor')(tensor_flat)
    
    return fused


print("融合策略定义完成！")
print("  1. Concat: 简单拼接 -> 256维")
print("  2. Weighted Sum: 加权求和 -> 128维")
print("  3. Gated: 三路门控 -> 128维")
print("  4. Hierarchical: 层次融合 -> 128维")
print("  5. Bilinear: 双线性交互 -> 128维")
print("  6. Cross-Attention: 跨模态注意力 -> 192维")
print("  7. Tensor: 张量融合 -> 128维")

## 5. 模型构建

In [None]:
def build_model(fusion_method='concat', learning_rate=0.0005):
    """
    构建三特征融合模型
    """
    K.clear_session()
    gc.collect()
    
    # 构建编码器
    clip_inp, clip_feat = build_clip_encoder()
    face_inp, face_feat = build_face_encoder()
    bert_inp, bert_feat = build_bert_encoder()
    
    # 选择融合策略
    fusion_methods = {
        'concat': fusion_concat,
        'weighted_sum': fusion_weighted_sum,
        'gated': fusion_gated,
        'hierarchical': fusion_hierarchical,
        'bilinear': fusion_bilinear,
        'cross_attention': fusion_cross_attention,
        'tensor': fusion_tensor
    }
    
    fused = fusion_methods[fusion_method](clip_feat, face_feat, bert_feat)
    
    # 分类头
    h = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(fused)
    h = BatchNormalization()(h)
    h = Dropout(0.5)(h)
    h = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(h)
    h = Dropout(0.3)(h)
    output = Dense(num_classes, activation='softmax', name='output')(h)
    
    model = Model(inputs=[clip_inp, face_inp, bert_inp], outputs=output)
    
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        metrics=['acc']
    )
    
    return model


print("模型构建函数完成")

## 6. 训练与评估

In [None]:
def train_single_run(fusion_method, run_id):
    """单次训练"""
    K.clear_session()
    gc.collect()
    
    model = build_model(fusion_method=fusion_method, learning_rate=0.0005)
    
    callbacks = [
        EarlyStopping(monitor='val_acc', patience=10, restore_best_weights=True, mode='max', verbose=0),
        ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5, min_lr=1e-7, mode='max', verbose=0)
    ]
    
    history = model.fit(
        x=[X_clip_train, X_face_train, X_bert_train],
        y=y_train,
        batch_size=8,  # Face图像较大，减小batch size
        epochs=50,
        validation_data=([X_clip_test, X_face_test, X_bert_test], y_test),
        callbacks=callbacks,
        class_weight=class_weight_dict,
        verbose=0
    )
    
    # 评估
    pred = model.predict([X_clip_test, X_face_test, X_bert_test], verbose=0)
    pred_labels = pred.argmax(axis=1)
    test_acc = np.mean(pred_labels == y_test)
    
    report = classification_report(y_test, pred_labels, target_names=class_labels,
                                   digits=4, output_dict=True, zero_division=0)
    f1_macro = report['macro avg']['f1-score']
    cm = confusion_matrix(y_test, pred_labels)
    
    acc_key = 'acc' if 'acc' in history.history else 'accuracy'
    val_acc_key = 'val_acc' if 'val_acc' in history.history else 'val_accuracy'
    best_train_acc = max(history.history[acc_key])
    best_epoch = history.history[val_acc_key].index(max(history.history[val_acc_key])) + 1
    
    result = {
        'run_id': run_id,
        'test_acc': test_acc,
        'f1_macro': f1_macro,
        'train_acc': best_train_acc,
        'best_epoch': best_epoch,
        'params': model.count_params(),
        'confusion_matrix': cm,
        'history': history.history,
        'model': model
    }
    
    return result


def train_and_evaluate(fusion_method, num_runs=5):
    """多次训练取最好"""
    print(f"\n{'='*60}")
    print(f"融合策略: {fusion_method.upper()} (重复 {num_runs} 次)")
    print(f"{'='*60}")
    
    os.makedirs(SAVE_DIR, exist_ok=True)
    
    all_runs = []
    best_model = None
    best_acc = -1
    best_idx = -1
    
    for run_id in range(1, num_runs + 1):
        print(f"  Run {run_id}/{num_runs}...", end=" ")
        result = train_single_run(fusion_method, run_id)
        print(f"ACC: {result['test_acc']:.4f}, F1: {result['f1_macro']:.4f}")
        
        if result['test_acc'] > best_acc:
            if best_model is not None:
                del best_model
            best_model = result['model']
            best_acc = result['test_acc']
            best_idx = run_id - 1
        else:
            del result['model']
        
        result_copy = {k: v for k, v in result.items() if k != 'model'}
        all_runs.append(result_copy)
        
        K.clear_session()
        gc.collect()
    
    accs = [r['test_acc'] for r in all_runs]
    f1s = [r['f1_macro'] for r in all_runs]
    
    mean_acc, std_acc = np.mean(accs), np.std(accs)
    max_acc, min_acc = np.max(accs), np.min(accs)
    mean_f1, std_f1 = np.mean(f1s), np.std(f1s)
    
    best_run = all_runs[best_idx]
    
    model_path = f'{SAVE_DIR}/model_{fusion_method}.tf'
    best_model.save(model_path)
    
    print(f"\n  统计: ACC = {mean_acc:.4f} ± {std_acc:.4f} (max={max_acc:.4f}, min={min_acc:.4f})")
    print(f"        F1  = {mean_f1:.4f} ± {std_f1:.4f}")
    print(f"  最佳: Run {best_run['run_id']}, ACC = {best_run['test_acc']:.4f}")
    
    del best_model
    K.clear_session()
    gc.collect()
    
    results = {
        'method': fusion_method,
        'num_runs': num_runs,
        'all_accs': accs,
        'all_f1s': f1s,
        'mean_acc': mean_acc,
        'std_acc': std_acc,
        'max_acc': max_acc,
        'min_acc': min_acc,
        'mean_f1': mean_f1,
        'std_f1': std_f1,
        'best_run': best_run,
        'test_acc': best_run['test_acc'],
        'f1_macro': best_run['f1_macro'],
        'train_acc': best_run['train_acc'],
        'best_epoch': best_run['best_epoch'],
        'params': best_run['params'],
        'confusion_matrix': best_run['confusion_matrix'],
        'history': best_run['history']
    }
    
    return results


print("训练评估函数完成")

## 7. 运行实验

In [None]:
fusion_methods = ['concat', 'weighted_sum', 'gated', 'hierarchical', 'bilinear', 'cross_attention', 'tensor']

print("="*60)
print("双模态多特征融合实验 (CLIP + Face + BERT)")
print("="*60)
print(f"Fold: {FOLD}")
print(f"特征: CLIP {CLIP_SHAPE} + Face {FACE_SHAPE} + BERT {BERT_SHAPE}")
print(f"融合策略: {fusion_methods}")
print(f"重复次数: {NUM_RUNS}")
print(f"保存路径: {SAVE_DIR}")
print("="*60)

In [None]:
# 运行所有实验
all_results = {}

for method in fusion_methods:
    results = train_and_evaluate(method, num_runs=NUM_RUNS)
    all_results[method] = results

print("\n" + "="*60)
print("所有实验完成！")
print("="*60)

## 8. 结果汇总

In [None]:
# 结果表格
results_df = pd.DataFrame({
    'Method': [r['method'].upper() for r in all_results.values()],
    'Best ACC': [f"{r['max_acc']:.4f}" for r in all_results.values()],
    'Mean±Std': [f"{r['mean_acc']:.4f}±{r['std_acc']:.4f}" for r in all_results.values()],
    'Best F1': [f"{r['f1_macro']:.4f}" for r in all_results.values()],
    'F1 Mean±Std': [f"{r['mean_f1']:.4f}±{r['std_f1']:.4f}" for r in all_results.values()],
    'Params': [f"{r['params']:,}" for r in all_results.values()],
})

results_df['best_acc_float'] = [r['max_acc'] for r in all_results.values()]
results_df = results_df.sort_values('best_acc_float', ascending=False).drop('best_acc_float', axis=1)

print("\n" + "="*90)
print(f"融合策略对比结果 (CLIP + Face + BERT, Fold {FOLD})")
print("="*90)
print(results_df.to_string(index=False))
print("="*90)

csv_path = f'{SAVE_DIR}/results_summary.csv'
results_df.to_csv(csv_path, index=False)
print(f"\n结果已保存至: {csv_path}")

In [None]:
# 可视化
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

sorted_results = sorted(all_results.values(), key=lambda x: x['max_acc'], reverse=True)
methods = [r['method'].upper() for r in sorted_results]
max_accs = [r['max_acc'] for r in sorted_results]
mean_accs = [r['mean_acc'] for r in sorted_results]
std_accs = [r['std_acc'] for r in sorted_results]
max_f1s = [r['f1_macro'] for r in sorted_results]
mean_f1s = [r['mean_f1'] for r in sorted_results]
std_f1s = [r['std_f1'] for r in sorted_results]

colors = ['#2ecc71', '#3498db', '#e74c3c', '#f39c12', '#9b59b6', '#1abc9c', '#e67e22']
x = np.arange(len(methods))

bars1 = axes[0].bar(x, max_accs, color=colors[:len(methods)], alpha=0.8, label='Best')
axes[0].errorbar(x, mean_accs, yerr=std_accs, fmt='o', color='black', capsize=5, capthick=2, label='Mean±Std')
axes[0].set_ylabel('Test Accuracy', fontsize=12)
axes[0].set_title(f'Test Accuracy (CLIP+Face+BERT, Fold {FOLD})', fontsize=14, fontweight='bold')
axes[0].set_ylim([0, 1])
axes[0].set_xticks(x)
axes[0].set_xticklabels(methods, rotation=45, ha='right')
axes[0].legend()
for bar, acc in zip(bars1, max_accs):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{acc:.4f}', ha='center', fontweight='bold', fontsize=8)

bars2 = axes[1].bar(x, max_f1s, color=colors[:len(methods)], alpha=0.8, label='Best')
axes[1].errorbar(x, mean_f1s, yerr=std_f1s, fmt='o', color='black', capsize=5, capthick=2, label='Mean±Std')
axes[1].set_ylabel('F1 Score (Macro)', fontsize=12)
axes[1].set_title(f'F1 Score (CLIP+Face+BERT, Fold {FOLD})', fontsize=14, fontweight='bold')
axes[1].set_ylim([0, 1])
axes[1].set_xticks(x)
axes[1].set_xticklabels(methods, rotation=45, ha='right')
axes[1].legend()
for bar, f1 in zip(bars2, max_f1s):
    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{f1:.4f}', ha='center', fontweight='bold', fontsize=8)

plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/accuracy_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 混淆矩阵
fig, axes = plt.subplots(2, 4, figsize=(20, 10))
axes = axes.flatten()

for idx, (method, results) in enumerate(all_results.items()):
    if idx >= 7:
        break
    ax = axes[idx]
    cm = results['confusion_matrix']
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    sns.heatmap(cm_norm, annot=True, fmt='.2f', cmap='Blues',
                xticklabels=class_labels, yticklabels=class_labels,
                ax=ax, vmin=0, vmax=1)
    ax.set_xlabel('Predicted')
    ax.set_ylabel('True')
    ax.set_title(f"{method.upper()} (ACC: {results['test_acc']:.4f})", fontweight='bold')

if len(all_results) < 8:
    axes[7].axis('off')

plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/confusion_matrices.png', dpi=300, bbox_inches='tight')
plt.show()

## 9. 最终总结

In [None]:
sorted_results = sorted(all_results.items(), key=lambda x: x[1]['max_acc'], reverse=True)
best_method, best_result = sorted_results[0]

print("\n" + "="*80)
print("双模态多特征融合实验 (Face版) - 最终总结")
print("="*80)

print("\n【实验配置】")
print(f"  Fold: {FOLD}")
print(f"  视觉特征: CLIP {CLIP_SHAPE} + Face {FACE_SHAPE}")
print(f"  文本特征: BERT {BERT_SHAPE}")
print(f"  重复次数: {NUM_RUNS}")

print("\n【融合策略排名】")
print("-" * 80)
print(f"{'排名':<4}{'策略':<18}{'Best ACC':<12}{'Mean±Std':<18}{'Best F1':<10}{'参数量':<12}")
print("-" * 80)
for rank, (method, r) in enumerate(sorted_results, 1):
    mean_std = f"{r['mean_acc']:.4f}±{r['std_acc']:.4f}"
    print(f"{rank:<4}{method.upper():<18}{r['max_acc']:<12.4f}{mean_std:<18}{r['f1_macro']:<10.4f}{r['params']:<12,}")
print("-" * 80)

print(f"\n【最佳融合策略】: {best_method.upper()}")
print(f"  Best ACC: {best_result['max_acc']:.4f}")
print(f"  Mean ACC: {best_result['mean_acc']:.4f} ± {best_result['std_acc']:.4f}")
print(f"  Best F1:  {best_result['f1_macro']:.4f}")

print(f"\n【保存路径】: {SAVE_DIR}")
print("="*80)
print("实验完成！")
print("="*80)

gc.collect()