In [1]:
import torch
import torch.nn as nn
from torch.optim import Adam
import sys
import os
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report
sys.path.append(os.path.join(os.getcwd(), 'data'))

from data_split import prepare_data

In [2]:
# 定义BERT模型用于处理文本信息
class BERTModel(nn.Module):
    def __init__(self, embed_size, hidden_size, num_layers, num_heads, vocab_size=30522, dropout=0.1, num_classes=3):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.positional_encoding = nn.Parameter(torch.randn(1, 512, embed_size))  
        self.encoder_layers = nn.TransformerEncoderLayer(
            d_model=embed_size,
            nhead=num_heads,
            dim_feedforward=hidden_size,
            dropout=dropout,
            batch_first=True  
        )
        
        self.encoder = nn.TransformerEncoder(
            self.encoder_layers,
            num_layers=num_layers
        )
        self.fc = nn.Linear(embed_size, num_classes)

    def forward(self, x, attention_mask):
        emb = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        attention_mask = attention_mask.bool()
        output = self.encoder(emb, src_key_padding_mask=attention_mask)
        cls_token_output = output[:, 0, :]  
        logits = self.fc(cls_token_output)  
        return logits

In [3]:
# 定义AlexNet模型用于处理图像信息
class AlexNetModel(nn.Module):
    def __init__(self, num_classes=3):
        super(AlexNetModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [4]:
# 多模态模型
class MultiModalModel(nn.Module):
    def __init__(self, text_model, img_model, fusion_dim = 6, num_classes = 3):
        super(MultiModalModel, self).__init__()
        self.text_model = text_model
        self.img_model = img_model
        self.fc_fusion = nn.Linear(fusion_dim, num_classes)

    def forward(self, img, text, attention_mask):
        img_features = self.img_model(img)  
        img_features = img_features.view(img_features.size(0), -1)  
        
        text_features = self.text_model(text, attention_mask)
        
        combined_features = torch.cat([img_features, text_features], dim=-1)
        
        output = self.fc_fusion(combined_features)
        return output

In [5]:
#消融实验：仅使用图像信息
class ImageModel(nn.Module):
    def __init__(self, text_model, img_model, fusion_dim=3, num_classes=3):
        super(ImageModel, self).__init__()
        self.text_model = text_model
        self.img_model = img_model
        self.fc_fusion = nn.Linear(fusion_dim, num_classes)  

    def forward(self, img, text, attention_mask):
        img_features = self.img_model(img)  
        img_features = img_features.view(img_features.size(0), -1)  
        
        output = self.fc_fusion(img_features)
        return output

In [6]:
#消融实验：仅使用文本信息
class TextModel(nn.Module):
    def __init__(self, text_model, img_model, fusion_dim=3, num_classes=3):
        super(TextModel, self).__init__()
        self.text_model = text_model
        self.img_model = img_model
        self.fc_fusion = nn.Linear(fusion_dim, num_classes)
        
    def forward(self, img, text, attention_mask):
        text_features = self.text_model(text, attention_mask)

        output = self.fc_fusion(text_features)
        return output

In [7]:
# 准备数据集
train_loader, val_loader, test_loader = prepare_data()

In [8]:
# 设置训练参数
args = {
    'lr': 1e-5,                # 学习率
    'batch_size': 64,          # 批量大小
    'epochs': 20,              # 训练轮次
    'embed_size': 256,         # 嵌入维度
    'hidden_size': 64,         # 隐藏层维度
    'num_layers': 2,           # Transformer 层数
    'num_heads': 4,            # Attention heads 数量
}

In [9]:
# 检查是否有可用的 GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")  # 打印当前使用的 GPU 名称
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU: NVIDIA RTX A4000


In [10]:
# 分别初始化BERT模型和ALEXNET模型
text_model = BERTModel(vocab_size=30522, embed_size=args['embed_size'], hidden_size=args['hidden_size'], num_layers=args['num_layers'], num_heads=args['num_heads'])
print("Text model initialized successfully.")

img_model = AlexNetModel(num_classes=3)
print("Image model initialized successfully.")

Text model initialized successfully.
Image model initialized successfully.


In [11]:
# 多模态模型初始化
model = MultiModalModel(text_model=text_model, img_model=img_model).to(device)
only_text_model = TextModel(text_model=text_model, img_model=img_model).to(device)
only_image_model = ImageModel(text_model=text_model, img_model=img_model).to(device)
print("Models initialized successfully.")

Models initialized successfully.


In [12]:
# 加权损失函数
class_counts = [1910, 954, 336]  # 训练集中的每个类别的样本数量
total_samples = sum(class_counts) 
class_weights = [total_samples / count for count in class_counts]
class_weights = torch.tensor(class_weights).float().to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = Adam(model.parameters(), lr=args['lr'])

In [13]:
# 多模态模型
best_val_f1 = 0.0  
best_train_f1 = 0.0 

for epoch in range(args['epochs']):
    print(f"Starting Epoch {epoch+1}/{args['epochs']}...")
    
    # 训练
    model.train()
    running_loss = 0.0
    all_train_labels = []
    all_train_preds = []

    for batch_idx, batch in enumerate(train_loader):
        text_inputs, attention_masks, img_inputs, labels = batch
        
        text_inputs = text_inputs.to(device)
        attention_masks = attention_masks.to(device)
        img_inputs = img_inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        
        outputs = model(img_inputs, text_inputs, attention_masks)  
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)

        all_train_labels.extend(labels.cpu().numpy())
        all_train_preds.extend(predicted.cpu().numpy())

        if (batch_idx + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{args['epochs']}, Batch {batch_idx+1}/{len(train_loader)}: "
                  f"Loss: {loss.item():.4f}")

    print(f"Epoch {epoch+1}/{args['epochs']} - Average Training Loss: {running_loss / len(train_loader):.4f}")
    train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')  # 计算加权的 F1-score
    train_precision = precision_score(all_train_labels, all_train_preds, average='weighted')
    train_recall = recall_score(all_train_labels, all_train_preds, average='weighted')
    print(f"Epoch {epoch+1}/{args['epochs']} - Training F1-score: {train_f1:.4f}")
    print(f"Epoch {epoch+1}/{args['epochs']} - Training Precision: {train_precision:.4f}")
    print(f"Epoch {epoch+1}/{args['epochs']} - Training Recall: {train_recall:.4f}")
    
    # 验证
    only_text_model.eval()
    all_val_labels = []
    all_val_preds = []
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            text_inputs, attention_masks, img_inputs, labels = batch
            text_inputs = text_inputs.to(device)
            attention_masks = attention_masks.to(device)
            img_inputs = img_inputs.to(device)
            labels = labels.to(device)

            outputs = model(img_inputs, text_inputs, attention_masks) # 使用仅文本模型

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            all_val_labels.extend(labels.cpu().numpy())
            all_val_preds.extend(predicted.cpu().numpy())

        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Loss: {val_loss / len(val_loader):.4f}")
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted')  # 计算加权的 F1-score
        val_precision = precision_score(all_val_labels, all_val_preds, average='weighted')
        val_recall = recall_score(all_val_labels, all_val_preds, average='weighted')
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation F1-score: {val_f1:.4f}")
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Precision: {val_precision:.4f}")
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Recall: {val_recall:.4f}")
    
    # 储存最佳模型
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        torch.save(model.state_dict(), 'val_best_model.pth')
        print(f"Saved best model with validation F1-score: {best_val_f1:.4f}")



Starting Epoch 1/20...
Epoch 1/20, Batch 10/100: Loss: 1.0732
Epoch 1/20, Batch 20/100: Loss: 1.1234
Epoch 1/20, Batch 30/100: Loss: 1.0794
Epoch 1/20, Batch 40/100: Loss: 1.1009
Epoch 1/20, Batch 50/100: Loss: 1.1363
Epoch 1/20, Batch 60/100: Loss: 1.1204
Epoch 1/20, Batch 70/100: Loss: 1.0947
Epoch 1/20, Batch 80/100: Loss: 1.1254
Epoch 1/20, Batch 90/100: Loss: 1.0848
Epoch 1/20, Batch 100/100: Loss: 1.0696
Epoch 1/20 - Average Training Loss: 1.1060
Epoch 1/20 - Training F1-score: 0.3486
Epoch 1/20 - Training Precision: 0.4679
Epoch 1/20 - Training Recall: 0.3294
Epoch 1/20 - Validation Loss: 1.0957
Epoch 1/20 - Validation F1-score: 0.1850
Epoch 1/20 - Validation Precision: 0.5024
Epoch 1/20 - Validation Recall: 0.3162


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved best model with validation F1-score: 0.1850
Starting Epoch 2/20...
Epoch 2/20, Batch 10/100: Loss: 1.1230
Epoch 2/20, Batch 20/100: Loss: 1.1331
Epoch 2/20, Batch 30/100: Loss: 1.0940
Epoch 2/20, Batch 40/100: Loss: 1.0842
Epoch 2/20, Batch 50/100: Loss: 1.0714
Epoch 2/20, Batch 60/100: Loss: 1.1064
Epoch 2/20, Batch 70/100: Loss: 1.0796
Epoch 2/20, Batch 80/100: Loss: 1.0824
Epoch 2/20, Batch 90/100: Loss: 1.0820
Epoch 2/20, Batch 100/100: Loss: 1.0655
Epoch 2/20 - Average Training Loss: 1.0997
Epoch 2/20 - Training F1-score: 0.4128
Epoch 2/20 - Training Precision: 0.4810
Epoch 2/20 - Training Recall: 0.3900
Epoch 2/20 - Validation Loss: 1.0946
Epoch 2/20 - Validation F1-score: 0.5038
Epoch 2/20 - Validation Precision: 0.5211
Epoch 2/20 - Validation Recall: 0.5913
Saved best model with validation F1-score: 0.5038
Starting Epoch 3/20...
Epoch 3/20, Batch 10/100: Loss: 1.0821
Epoch 3/20, Batch 20/100: Loss: 1.0981
Epoch 3/20, Batch 30/100: Loss: 1.0974
Epoch 3/20, Batch 40/100: Lo

In [14]:
# 加载最佳模型进行测试
model.load_state_dict(torch.load('val_best_model.pth'))
model.eval()

with torch.no_grad():
    predictions = []
    for batch in test_loader:
        text_inputs, attention_masks, img_inputs, _ = batch
        text_inputs = text_inputs.to(device)
        attention_masks = attention_masks.to(device)
        img_inputs = img_inputs.to(device)

        outputs = model(img_inputs, text_inputs, attention_masks)

        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.tolist())
        
with open('val_predictions.txt', 'w') as f:
    for pred in predictions:
        f.write(str(pred) + '\n')

  model.load_state_dict(torch.load('val_best_model.pth'))


In [15]:
# 消融实验：只使用文本信息

best_val_f1 = 0.0  
best_train_f1 = 0.0  

for epoch in range(args['epochs']):
    print(f"Starting Epoch {epoch+1}/{args['epochs']}...")
    
    # 训练
    only_text_model.train()
    running_loss = 0.0
    all_train_labels = []
    all_train_preds = []

    for batch_idx, batch in enumerate(train_loader):
        text_inputs, attention_masks, img_inputs, labels = batch
        
        text_inputs = text_inputs.to(device)
        attention_masks = attention_masks.to(device)
        img_inputs = img_inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        
        outputs = only_text_model(img_inputs, text_inputs, attention_masks)  
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)

        all_train_labels.extend(labels.cpu().numpy())
        all_train_preds.extend(predicted.cpu().numpy())

        if (batch_idx + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{args['epochs']}, Batch {batch_idx+1}/{len(train_loader)}: "
                  f"Loss: {loss.item():.4f}")

    print(f"Epoch {epoch+1}/{args['epochs']} - Average Training Loss: {running_loss / len(train_loader):.4f}")
    train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')  
    train_precision = precision_score(all_train_labels, all_train_preds, average='weighted')
    train_recall = recall_score(all_train_labels, all_train_preds, average='weighted')
    print(f"Epoch {epoch+1}/{args['epochs']} - Training F1-score: {train_f1:.4f}")
    print(f"Epoch {epoch+1}/{args['epochs']} - Training Precision: {train_precision:.4f}")
    print(f"Epoch {epoch+1}/{args['epochs']} - Training Recall: {train_recall:.4f}")

    # 验证
    only_text_model.eval()
    all_val_labels = []
    all_val_preds = []
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            text_inputs, attention_masks, img_inputs, labels = batch
            text_inputs = text_inputs.to(device)
            attention_masks = attention_masks.to(device)
            img_inputs = img_inputs.to(device)
            labels = labels.to(device)

            outputs = only_text_model(img_inputs, text_inputs, attention_masks)  

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            all_val_labels.extend(labels.cpu().numpy())
            all_val_preds.extend(predicted.cpu().numpy())

        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Loss: {val_loss / len(val_loader):.4f}")
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted') 
        val_precision = precision_score(all_val_labels, all_val_preds, average='weighted')
        val_recall = recall_score(all_val_labels, all_val_preds, average='weighted')
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation F1-score: {val_f1:.4f}")
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Precision: {val_precision:.4f}")
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Recall: {val_recall:.4f}")

Starting Epoch 1/20...
Epoch 1/20, Batch 10/100: Loss: 1.1764
Epoch 1/20, Batch 20/100: Loss: 1.1222
Epoch 1/20, Batch 30/100: Loss: 1.1050
Epoch 1/20, Batch 40/100: Loss: 1.1192
Epoch 1/20, Batch 50/100: Loss: 1.0263
Epoch 1/20, Batch 60/100: Loss: 1.0687
Epoch 1/20, Batch 70/100: Loss: 1.1469
Epoch 1/20, Batch 80/100: Loss: 1.0742
Epoch 1/20, Batch 90/100: Loss: 1.1030
Epoch 1/20, Batch 100/100: Loss: 1.0788
Epoch 1/20 - Average Training Loss: 1.1122
Epoch 1/20 - Training F1-score: 0.4619
Epoch 1/20 - Training Precision: 0.4430
Epoch 1/20 - Training Recall: 0.4988
Epoch 1/20 - Validation Loss: 1.0977
Epoch 1/20 - Validation F1-score: 0.4470
Epoch 1/20 - Validation Precision: 0.3570
Epoch 1/20 - Validation Recall: 0.5975
Starting Epoch 2/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/20, Batch 10/100: Loss: 1.0956
Epoch 2/20, Batch 20/100: Loss: 1.0930
Epoch 2/20, Batch 30/100: Loss: 1.0686
Epoch 2/20, Batch 40/100: Loss: 1.0600
Epoch 2/20, Batch 50/100: Loss: 1.0895
Epoch 2/20, Batch 60/100: Loss: 1.0730
Epoch 2/20, Batch 70/100: Loss: 1.1106
Epoch 2/20, Batch 80/100: Loss: 1.0827
Epoch 2/20, Batch 90/100: Loss: 1.0694
Epoch 2/20, Batch 100/100: Loss: 1.0787
Epoch 2/20 - Average Training Loss: 1.0994
Epoch 2/20 - Training F1-score: 0.4551
Epoch 2/20 - Training Precision: 0.4542
Epoch 2/20 - Training Recall: 0.4559
Epoch 2/20 - Validation Loss: 1.0977
Epoch 2/20 - Validation F1-score: 0.4470
Epoch 2/20 - Validation Precision: 0.3570
Epoch 2/20 - Validation Recall: 0.5975
Starting Epoch 3/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 3/20, Batch 10/100: Loss: 1.1025
Epoch 3/20, Batch 20/100: Loss: 1.1096
Epoch 3/20, Batch 30/100: Loss: 1.0914
Epoch 3/20, Batch 40/100: Loss: 1.1209
Epoch 3/20, Batch 50/100: Loss: 1.1091
Epoch 3/20, Batch 60/100: Loss: 1.1084
Epoch 3/20, Batch 70/100: Loss: 1.1302
Epoch 3/20, Batch 80/100: Loss: 1.0910
Epoch 3/20, Batch 90/100: Loss: 1.1325
Epoch 3/20, Batch 100/100: Loss: 1.0920
Epoch 3/20 - Average Training Loss: 1.1001
Epoch 3/20 - Training F1-score: 0.4470
Epoch 3/20 - Training Precision: 0.4553
Epoch 3/20 - Training Recall: 0.4444
Epoch 3/20 - Validation Loss: 1.0968
Epoch 3/20 - Validation F1-score: 0.1374
Epoch 3/20 - Validation Precision: 0.0893
Epoch 3/20 - Validation Recall: 0.2988
Starting Epoch 4/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 4/20, Batch 10/100: Loss: 1.0871
Epoch 4/20, Batch 20/100: Loss: 1.1005
Epoch 4/20, Batch 30/100: Loss: 1.1292
Epoch 4/20, Batch 40/100: Loss: 1.1029
Epoch 4/20, Batch 50/100: Loss: 1.1242
Epoch 4/20, Batch 60/100: Loss: 1.0931
Epoch 4/20, Batch 70/100: Loss: 1.0923
Epoch 4/20, Batch 80/100: Loss: 1.1100
Epoch 4/20, Batch 90/100: Loss: 1.0907
Epoch 4/20, Batch 100/100: Loss: 1.0686
Epoch 4/20 - Average Training Loss: 1.1016
Epoch 4/20 - Training F1-score: 0.4069
Epoch 4/20 - Training Precision: 0.4433
Epoch 4/20 - Training Recall: 0.3981
Epoch 4/20 - Validation Loss: 1.0974
Epoch 4/20 - Validation F1-score: 0.4470
Epoch 4/20 - Validation Precision: 0.3570
Epoch 4/20 - Validation Recall: 0.5975
Starting Epoch 5/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 5/20, Batch 10/100: Loss: 1.1000
Epoch 5/20, Batch 20/100: Loss: 1.0962
Epoch 5/20, Batch 30/100: Loss: 1.0967
Epoch 5/20, Batch 40/100: Loss: 1.0936
Epoch 5/20, Batch 50/100: Loss: 1.1018
Epoch 5/20, Batch 60/100: Loss: 1.0920
Epoch 5/20, Batch 70/100: Loss: 1.0814
Epoch 5/20, Batch 80/100: Loss: 1.1109
Epoch 5/20, Batch 90/100: Loss: 1.0975
Epoch 5/20, Batch 100/100: Loss: 1.1109
Epoch 5/20 - Average Training Loss: 1.0985
Epoch 5/20 - Training F1-score: 0.4425
Epoch 5/20 - Training Precision: 0.4724
Epoch 5/20 - Training Recall: 0.4228
Epoch 5/20 - Validation Loss: 1.0970
Epoch 5/20 - Validation F1-score: 0.1374
Epoch 5/20 - Validation Precision: 0.0893
Epoch 5/20 - Validation Recall: 0.2988
Starting Epoch 6/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 6/20, Batch 10/100: Loss: 1.0958
Epoch 6/20, Batch 20/100: Loss: 1.0985
Epoch 6/20, Batch 30/100: Loss: 1.0715
Epoch 6/20, Batch 40/100: Loss: 1.0813
Epoch 6/20, Batch 50/100: Loss: 1.0985
Epoch 6/20, Batch 60/100: Loss: 1.1314
Epoch 6/20, Batch 70/100: Loss: 1.0353
Epoch 6/20, Batch 80/100: Loss: 1.1344
Epoch 6/20, Batch 90/100: Loss: 1.1069
Epoch 6/20, Batch 100/100: Loss: 1.1252
Epoch 6/20 - Average Training Loss: 1.1013
Epoch 6/20 - Training F1-score: 0.4012
Epoch 6/20 - Training Precision: 0.4538
Epoch 6/20 - Training Recall: 0.3984
Epoch 6/20 - Validation Loss: 1.0966
Epoch 6/20 - Validation F1-score: 0.1374
Epoch 6/20 - Validation Precision: 0.0893
Epoch 6/20 - Validation Recall: 0.2988
Starting Epoch 7/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 7/20, Batch 10/100: Loss: 1.1084
Epoch 7/20, Batch 20/100: Loss: 1.0900
Epoch 7/20, Batch 30/100: Loss: 1.1025
Epoch 7/20, Batch 40/100: Loss: 1.1059
Epoch 7/20, Batch 50/100: Loss: 1.1326
Epoch 7/20, Batch 60/100: Loss: 1.0794
Epoch 7/20, Batch 70/100: Loss: 1.1015
Epoch 7/20, Batch 80/100: Loss: 1.0846
Epoch 7/20, Batch 90/100: Loss: 1.1082
Epoch 7/20, Batch 100/100: Loss: 1.1143
Epoch 7/20 - Average Training Loss: 1.0985
Epoch 7/20 - Training F1-score: 0.4657
Epoch 7/20 - Training Precision: 0.4699
Epoch 7/20 - Training Recall: 0.4719
Epoch 7/20 - Validation Loss: 1.0967
Epoch 7/20 - Validation F1-score: 0.4470
Epoch 7/20 - Validation Precision: 0.3570
Epoch 7/20 - Validation Recall: 0.5975
Starting Epoch 8/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 8/20, Batch 10/100: Loss: 1.1099
Epoch 8/20, Batch 20/100: Loss: 1.0940
Epoch 8/20, Batch 30/100: Loss: 1.1243
Epoch 8/20, Batch 40/100: Loss: 1.1095
Epoch 8/20, Batch 50/100: Loss: 1.0832
Epoch 8/20, Batch 60/100: Loss: 1.1142
Epoch 8/20, Batch 70/100: Loss: 1.0911
Epoch 8/20, Batch 80/100: Loss: 1.0848
Epoch 8/20, Batch 90/100: Loss: 1.1300
Epoch 8/20, Batch 100/100: Loss: 1.0756
Epoch 8/20 - Average Training Loss: 1.0998
Epoch 8/20 - Training F1-score: 0.4348
Epoch 8/20 - Training Precision: 0.4469
Epoch 8/20 - Training Recall: 0.4319
Epoch 8/20 - Validation Loss: 1.0966
Epoch 8/20 - Validation F1-score: 0.4470
Epoch 8/20 - Validation Precision: 0.3570
Epoch 8/20 - Validation Recall: 0.5975
Starting Epoch 9/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 9/20, Batch 10/100: Loss: 1.1322
Epoch 9/20, Batch 20/100: Loss: 1.0904
Epoch 9/20, Batch 30/100: Loss: 1.0851
Epoch 9/20, Batch 40/100: Loss: 1.1091
Epoch 9/20, Batch 50/100: Loss: 1.1020
Epoch 9/20, Batch 60/100: Loss: 1.0779
Epoch 9/20, Batch 70/100: Loss: 1.1223
Epoch 9/20, Batch 80/100: Loss: 1.0653
Epoch 9/20, Batch 90/100: Loss: 1.0696
Epoch 9/20, Batch 100/100: Loss: 1.0968
Epoch 9/20 - Average Training Loss: 1.1002
Epoch 9/20 - Training F1-score: 0.4506
Epoch 9/20 - Training Precision: 0.4584
Epoch 9/20 - Training Recall: 0.4547
Epoch 9/20 - Validation Loss: 1.0966
Epoch 9/20 - Validation F1-score: 0.4683
Epoch 9/20 - Validation Precision: 0.5183
Epoch 9/20 - Validation Recall: 0.6012
Starting Epoch 10/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 10/20, Batch 10/100: Loss: 1.0987
Epoch 10/20, Batch 20/100: Loss: 1.0824
Epoch 10/20, Batch 30/100: Loss: 1.0992
Epoch 10/20, Batch 40/100: Loss: 1.0829
Epoch 10/20, Batch 50/100: Loss: 1.0922
Epoch 10/20, Batch 60/100: Loss: 1.1067
Epoch 10/20, Batch 70/100: Loss: 1.0729
Epoch 10/20, Batch 80/100: Loss: 1.0916
Epoch 10/20, Batch 90/100: Loss: 1.0658
Epoch 10/20, Batch 100/100: Loss: 1.0577
Epoch 10/20 - Average Training Loss: 1.0978
Epoch 10/20 - Training F1-score: 0.4435
Epoch 10/20 - Training Precision: 0.4686
Epoch 10/20 - Training Recall: 0.4363
Epoch 10/20 - Validation Loss: 1.0966
Epoch 10/20 - Validation F1-score: 0.4846
Epoch 10/20 - Validation Precision: 0.4530
Epoch 10/20 - Validation Recall: 0.5525
Starting Epoch 11/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 11/20, Batch 10/100: Loss: 1.0975
Epoch 11/20, Batch 20/100: Loss: 1.1027
Epoch 11/20, Batch 30/100: Loss: 1.1185
Epoch 11/20, Batch 40/100: Loss: 1.1191
Epoch 11/20, Batch 50/100: Loss: 1.1005
Epoch 11/20, Batch 60/100: Loss: 1.0895
Epoch 11/20, Batch 70/100: Loss: 1.1152
Epoch 11/20, Batch 80/100: Loss: 1.0993
Epoch 11/20, Batch 90/100: Loss: 1.0685
Epoch 11/20, Batch 100/100: Loss: 1.1099
Epoch 11/20 - Average Training Loss: 1.0977
Epoch 11/20 - Training F1-score: 0.4675
Epoch 11/20 - Training Precision: 0.4588
Epoch 11/20 - Training Recall: 0.4778
Epoch 11/20 - Validation Loss: 1.0965
Epoch 11/20 - Validation F1-score: 0.4682
Epoch 11/20 - Validation Precision: 0.4952
Epoch 11/20 - Validation Recall: 0.5975
Starting Epoch 12/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 12/20, Batch 10/100: Loss: 1.1331
Epoch 12/20, Batch 20/100: Loss: 1.0960
Epoch 12/20, Batch 30/100: Loss: 1.1139
Epoch 12/20, Batch 40/100: Loss: 1.1117
Epoch 12/20, Batch 50/100: Loss: 1.1054
Epoch 12/20, Batch 60/100: Loss: 1.1030
Epoch 12/20, Batch 70/100: Loss: 1.0786
Epoch 12/20, Batch 80/100: Loss: 1.0961
Epoch 12/20, Batch 90/100: Loss: 1.1245
Epoch 12/20, Batch 100/100: Loss: 1.1236
Epoch 12/20 - Average Training Loss: 1.0983
Epoch 12/20 - Training F1-score: 0.4432
Epoch 12/20 - Training Precision: 0.4647
Epoch 12/20 - Training Recall: 0.4406
Epoch 12/20 - Validation Loss: 1.0965
Epoch 12/20 - Validation F1-score: 0.4846
Epoch 12/20 - Validation Precision: 0.4530
Epoch 12/20 - Validation Recall: 0.5525
Starting Epoch 13/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 13/20, Batch 10/100: Loss: 1.1093
Epoch 13/20, Batch 20/100: Loss: 1.0945
Epoch 13/20, Batch 30/100: Loss: 1.1038
Epoch 13/20, Batch 40/100: Loss: 1.0939
Epoch 13/20, Batch 50/100: Loss: 1.1045
Epoch 13/20, Batch 60/100: Loss: 1.0837
Epoch 13/20, Batch 70/100: Loss: 1.0885
Epoch 13/20, Batch 80/100: Loss: 1.0858
Epoch 13/20, Batch 90/100: Loss: 1.1195
Epoch 13/20, Batch 100/100: Loss: 1.1042
Epoch 13/20 - Average Training Loss: 1.1012
Epoch 13/20 - Training F1-score: 0.4079
Epoch 13/20 - Training Precision: 0.4472
Epoch 13/20 - Training Recall: 0.3862
Epoch 13/20 - Validation Loss: 1.0966
Epoch 13/20 - Validation F1-score: 0.1374
Epoch 13/20 - Validation Precision: 0.0893
Epoch 13/20 - Validation Recall: 0.2988
Starting Epoch 14/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 14/20, Batch 10/100: Loss: 1.0689
Epoch 14/20, Batch 20/100: Loss: 1.0797
Epoch 14/20, Batch 30/100: Loss: 1.0823
Epoch 14/20, Batch 40/100: Loss: 1.1067
Epoch 14/20, Batch 50/100: Loss: 1.1026
Epoch 14/20, Batch 60/100: Loss: 1.0950
Epoch 14/20, Batch 70/100: Loss: 1.0943
Epoch 14/20, Batch 80/100: Loss: 1.1094
Epoch 14/20, Batch 90/100: Loss: 1.0874
Epoch 14/20, Batch 100/100: Loss: 1.1022
Epoch 14/20 - Average Training Loss: 1.1004
Epoch 14/20 - Training F1-score: 0.4544
Epoch 14/20 - Training Precision: 0.4446
Epoch 14/20 - Training Recall: 0.4694
Epoch 14/20 - Validation Loss: 1.0972
Epoch 14/20 - Validation F1-score: 0.4470
Epoch 14/20 - Validation Precision: 0.3570
Epoch 14/20 - Validation Recall: 0.5975
Starting Epoch 15/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 15/20, Batch 10/100: Loss: 1.1187
Epoch 15/20, Batch 20/100: Loss: 1.0976
Epoch 15/20, Batch 30/100: Loss: 1.0883
Epoch 15/20, Batch 40/100: Loss: 1.1058
Epoch 15/20, Batch 50/100: Loss: 1.0829
Epoch 15/20, Batch 60/100: Loss: 1.0956
Epoch 15/20, Batch 70/100: Loss: 1.0749
Epoch 15/20, Batch 80/100: Loss: 1.1150
Epoch 15/20, Batch 90/100: Loss: 1.0657
Epoch 15/20, Batch 100/100: Loss: 1.1232
Epoch 15/20 - Average Training Loss: 1.0983
Epoch 15/20 - Training F1-score: 0.4548
Epoch 15/20 - Training Precision: 0.4591
Epoch 15/20 - Training Recall: 0.4547
Epoch 15/20 - Validation Loss: 1.0971
Epoch 15/20 - Validation F1-score: 0.1374
Epoch 15/20 - Validation Precision: 0.0893
Epoch 15/20 - Validation Recall: 0.2988
Starting Epoch 16/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 16/20, Batch 10/100: Loss: 1.0654
Epoch 16/20, Batch 20/100: Loss: 1.0909
Epoch 16/20, Batch 30/100: Loss: 1.0988
Epoch 16/20, Batch 40/100: Loss: 1.1122
Epoch 16/20, Batch 50/100: Loss: 1.1006
Epoch 16/20, Batch 60/100: Loss: 1.0714
Epoch 16/20, Batch 70/100: Loss: 1.1000
Epoch 16/20, Batch 80/100: Loss: 1.1045
Epoch 16/20, Batch 90/100: Loss: 1.1242
Epoch 16/20, Batch 100/100: Loss: 1.1014
Epoch 16/20 - Average Training Loss: 1.0996
Epoch 16/20 - Training F1-score: 0.4490
Epoch 16/20 - Training Precision: 0.4459
Epoch 16/20 - Training Recall: 0.4553
Epoch 16/20 - Validation Loss: 1.0967
Epoch 16/20 - Validation F1-score: 0.4911
Epoch 16/20 - Validation Precision: 0.4636
Epoch 16/20 - Validation Recall: 0.5225
Starting Epoch 17/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 17/20, Batch 10/100: Loss: 1.1030
Epoch 17/20, Batch 20/100: Loss: 1.1093
Epoch 17/20, Batch 30/100: Loss: 1.0893
Epoch 17/20, Batch 40/100: Loss: 1.1111
Epoch 17/20, Batch 50/100: Loss: 1.1373
Epoch 17/20, Batch 60/100: Loss: 1.1097
Epoch 17/20, Batch 70/100: Loss: 1.1000
Epoch 17/20, Batch 80/100: Loss: 1.1150
Epoch 17/20, Batch 90/100: Loss: 1.1001
Epoch 17/20, Batch 100/100: Loss: 1.0803
Epoch 17/20 - Average Training Loss: 1.1003
Epoch 17/20 - Training F1-score: 0.4195
Epoch 17/20 - Training Precision: 0.4331
Epoch 17/20 - Training Recall: 0.4156
Epoch 17/20 - Validation Loss: 1.0967
Epoch 17/20 - Validation F1-score: 0.4846
Epoch 17/20 - Validation Precision: 0.4530
Epoch 17/20 - Validation Recall: 0.5525
Starting Epoch 18/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 18/20, Batch 10/100: Loss: 1.1062
Epoch 18/20, Batch 20/100: Loss: 1.0873
Epoch 18/20, Batch 30/100: Loss: 1.0988
Epoch 18/20, Batch 40/100: Loss: 1.0977
Epoch 18/20, Batch 50/100: Loss: 1.1393
Epoch 18/20, Batch 60/100: Loss: 1.0988
Epoch 18/20, Batch 70/100: Loss: 1.1113
Epoch 18/20, Batch 80/100: Loss: 1.1267
Epoch 18/20, Batch 90/100: Loss: 1.0953
Epoch 18/20, Batch 100/100: Loss: 1.1142
Epoch 18/20 - Average Training Loss: 1.1020
Epoch 18/20 - Training F1-score: 0.4103
Epoch 18/20 - Training Precision: 0.4416
Epoch 18/20 - Training Recall: 0.3981
Epoch 18/20 - Validation Loss: 1.0969
Epoch 18/20 - Validation F1-score: 0.4470
Epoch 18/20 - Validation Precision: 0.3570
Epoch 18/20 - Validation Recall: 0.5975
Starting Epoch 19/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 19/20, Batch 10/100: Loss: 1.1136
Epoch 19/20, Batch 20/100: Loss: 1.0654
Epoch 19/20, Batch 30/100: Loss: 1.1007
Epoch 19/20, Batch 40/100: Loss: 1.0918
Epoch 19/20, Batch 50/100: Loss: 1.1090
Epoch 19/20, Batch 60/100: Loss: 1.0737
Epoch 19/20, Batch 70/100: Loss: 1.0928
Epoch 19/20, Batch 80/100: Loss: 1.1117
Epoch 19/20, Batch 90/100: Loss: 1.1226
Epoch 19/20, Batch 100/100: Loss: 1.1289
Epoch 19/20 - Average Training Loss: 1.1009
Epoch 19/20 - Training F1-score: 0.4363
Epoch 19/20 - Training Precision: 0.4497
Epoch 19/20 - Training Recall: 0.4266
Epoch 19/20 - Validation Loss: 1.0964
Epoch 19/20 - Validation F1-score: 0.1374
Epoch 19/20 - Validation Precision: 0.0893
Epoch 19/20 - Validation Recall: 0.2988
Starting Epoch 20/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 20/20, Batch 10/100: Loss: 1.0897
Epoch 20/20, Batch 20/100: Loss: 1.1347
Epoch 20/20, Batch 30/100: Loss: 1.0940
Epoch 20/20, Batch 40/100: Loss: 1.1420
Epoch 20/20, Batch 50/100: Loss: 1.1012
Epoch 20/20, Batch 60/100: Loss: 1.1115
Epoch 20/20, Batch 70/100: Loss: 1.1109
Epoch 20/20, Batch 80/100: Loss: 1.0850
Epoch 20/20, Batch 90/100: Loss: 1.0754
Epoch 20/20, Batch 100/100: Loss: 1.0975
Epoch 20/20 - Average Training Loss: 1.0973
Epoch 20/20 - Training F1-score: 0.4675
Epoch 20/20 - Training Precision: 0.4643
Epoch 20/20 - Training Recall: 0.4719
Epoch 20/20 - Validation Loss: 1.0964
Epoch 20/20 - Validation F1-score: 0.1374
Epoch 20/20 - Validation Precision: 0.0893
Epoch 20/20 - Validation Recall: 0.2988


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
# 消融实验：只使用图像信息

best_val_f1 = 0.0  
best_train_f1 = 0.0  

for epoch in range(args['epochs']):
    print(f"Starting Epoch {epoch+1}/{args['epochs']}...")
    
    # 训练
    only_image_model.train()
    running_loss = 0.0
    all_train_labels = []
    all_train_preds = []

    for batch_idx, batch in enumerate(train_loader):
        text_inputs, attention_masks, img_inputs, labels = batch
        
        text_inputs = text_inputs.to(device)
        attention_masks = attention_masks.to(device)
        img_inputs = img_inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        
        outputs = only_image_model(img_inputs, text_inputs, attention_masks)  
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)

        all_train_labels.extend(labels.cpu().numpy())
        all_train_preds.extend(predicted.cpu().numpy())

        if (batch_idx + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{args['epochs']}, Batch {batch_idx+1}/{len(train_loader)}: "
                  f"Loss: {loss.item():.4f}")

    print(f"Epoch {epoch+1}/{args['epochs']} - Average Training Loss: {running_loss / len(train_loader):.4f}")
    train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')  
    train_precision = precision_score(all_train_labels, all_train_preds, average='weighted')
    train_recall = recall_score(all_train_labels, all_train_preds, average='weighted')
    print(f"Epoch {epoch+1}/{args['epochs']} - Training F1-score: {train_f1:.4f}")
    print(f"Epoch {epoch+1}/{args['epochs']} - Training Precision: {train_precision:.4f}")
    print(f"Epoch {epoch+1}/{args['epochs']} - Training Recall: {train_recall:.4f}")

    # 验证
    only_image_model.eval()
    all_val_labels = []
    all_val_preds = []
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            text_inputs, attention_masks, img_inputs, labels = batch
            text_inputs = text_inputs.to(device)
            attention_masks = attention_masks.to(device)
            img_inputs = img_inputs.to(device)
            labels = labels.to(device)

            outputs = only_image_model(img_inputs, text_inputs, attention_masks)  

            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            all_val_labels.extend(labels.cpu().numpy())
            all_val_preds.extend(predicted.cpu().numpy())

        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Loss: {val_loss / len(val_loader):.4f}")
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted') 
        val_precision = precision_score(all_val_labels, all_val_preds, average='weighted')
        val_recall = recall_score(all_val_labels, all_val_preds, average='weighted')
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation F1-score: {val_f1:.4f}")
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Precision: {val_precision:.4f}")
        print(f"Epoch {epoch+1}/{args['epochs']} - Validation Recall: {val_recall:.4f}")

Starting Epoch 1/20...
Epoch 1/20, Batch 10/100: Loss: 1.1278
Epoch 1/20, Batch 20/100: Loss: 1.1090
Epoch 1/20, Batch 30/100: Loss: 1.1091
Epoch 1/20, Batch 40/100: Loss: 1.0709
Epoch 1/20, Batch 50/100: Loss: 1.1118
Epoch 1/20, Batch 60/100: Loss: 1.0884
Epoch 1/20, Batch 70/100: Loss: 1.1151
Epoch 1/20, Batch 80/100: Loss: 1.0965
Epoch 1/20, Batch 90/100: Loss: 1.0649
Epoch 1/20, Batch 100/100: Loss: 1.0690
Epoch 1/20 - Average Training Loss: 1.1034
Epoch 1/20 - Training F1-score: 0.4750
Epoch 1/20 - Training Precision: 0.4530
Epoch 1/20 - Training Recall: 0.4997


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/20 - Validation Loss: 1.0993
Epoch 1/20 - Validation F1-score: 0.4718
Epoch 1/20 - Validation Precision: 0.4512
Epoch 1/20 - Validation Recall: 0.4950
Starting Epoch 2/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/20, Batch 10/100: Loss: 1.0732
Epoch 2/20, Batch 20/100: Loss: 1.1283
Epoch 2/20, Batch 30/100: Loss: 1.0652
Epoch 2/20, Batch 40/100: Loss: 1.0635
Epoch 2/20, Batch 50/100: Loss: 1.1126
Epoch 2/20, Batch 60/100: Loss: 1.0760
Epoch 2/20, Batch 70/100: Loss: 1.0703
Epoch 2/20, Batch 80/100: Loss: 1.0805
Epoch 2/20, Batch 90/100: Loss: 1.1170
Epoch 2/20, Batch 100/100: Loss: 1.0873
Epoch 2/20 - Average Training Loss: 1.0991
Epoch 2/20 - Training F1-score: 0.4847
Epoch 2/20 - Training Precision: 0.4644
Epoch 2/20 - Training Recall: 0.5081


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/20 - Validation Loss: 1.0956
Epoch 2/20 - Validation F1-score: 0.4878
Epoch 2/20 - Validation Precision: 0.4689
Epoch 2/20 - Validation Recall: 0.5100
Starting Epoch 3/20...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 3/20, Batch 10/100: Loss: 1.0638
Epoch 3/20, Batch 20/100: Loss: 1.1139
Epoch 3/20, Batch 30/100: Loss: 1.1108
Epoch 3/20, Batch 40/100: Loss: 1.0845
Epoch 3/20, Batch 50/100: Loss: 1.1123
Epoch 3/20, Batch 60/100: Loss: 1.1081
Epoch 3/20, Batch 70/100: Loss: 1.0956
Epoch 3/20, Batch 80/100: Loss: 1.0750
Epoch 3/20, Batch 90/100: Loss: 1.1555
Epoch 3/20, Batch 100/100: Loss: 1.0903
Epoch 3/20 - Average Training Loss: 1.0982
Epoch 3/20 - Training F1-score: 0.4821
Epoch 3/20 - Training Precision: 0.4822
Epoch 3/20 - Training Recall: 0.5059
Epoch 3/20 - Validation Loss: 1.0951
Epoch 3/20 - Validation F1-score: 0.4934
Epoch 3/20 - Validation Precision: 0.4930
Epoch 3/20 - Validation Recall: 0.5400
Starting Epoch 4/20...
Epoch 4/20, Batch 10/100: Loss: 1.0854
Epoch 4/20, Batch 20/100: Loss: 1.1098
Epoch 4/20, Batch 30/100: Loss: 1.0771
Epoch 4/20, Batch 40/100: Loss: 1.0460
Epoch 4/20, Batch 50/100: Loss: 1.0814
Epoch 4/20, Batch 60/100: Loss: 1.1067
Epoch 4/20, Batch 70/100: Loss: 1.