In [3]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import random
import time
import GPUtil
from functions import Dataset_CRNN, labels2cat  # 你自己的数据集和标签处理函数
import warnings
warnings.filterwarnings("ignore")

# ===== DINOv2 特定模块 =====
def DINOv2Transform(size=224):
    """DINOv2专用的图像预处理流程"""
    return transforms.Compose([
        transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ])

def get_dinov2_feature_extractor(model_name='dinov2_vits14', device='cuda'):
    """获取DINOv2特征提取器"""
    model = torch.hub.load('facebookresearch/dinov2', model_name).to(device)
    model.eval()
    for param in model.parameters():
        param.requires_grad_(False)
    return model

# ===== 内存监控工具 =====
def print_gpu_usage(msg):
    """打印GPU内存使用情况"""
    GPUs = GPUtil.getGPUs()
    for i, gpu in enumerate(GPUs):
        print(f"{msg} - GPU {i}: {gpu.memoryUsed:.1f}/{gpu.memoryTotal:.1f} MB ({gpu.memoryUtil*100:.1f}%)")
    if torch.cuda.is_available():
        print(f"{msg} - Allocated: {torch.cuda.memory_allocated()/1024**2:.1f} MB, "
              f"Cached: {torch.cuda.memory_reserved()/1024**2:.1f} MB")

# ===== 模型定义 =====

class DINOv2FeatureExtractor(nn.Module):
    def __init__(self, model_name='dinov2_vits14', device='cuda'):
        super(DINOv2FeatureExtractor, self).__init__()
        self.device = device
        self.model = get_dinov2_feature_extractor(model_name, device)
        self.feature_dim = self.model.embed_dim  # DINOv2的特征维度

    def forward(self, x):
        if self.training:
            # 训练模式下允许梯度计算，可以开启自动混合精度
            with torch.cuda.amp.autocast():
                features = self.model(x)
        else:
            # 验证/推理模式关闭梯度节省显存
            with torch.no_grad():
                features = self.model(x)
        return features

class ResCNNEncoder(nn.Module):
    def __init__(self, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512, device='cuda', model_name='dinov2_vits14'):
        super(ResCNNEncoder, self).__init__()
        
        self.feature_extractor = DINOv2FeatureExtractor(model_name, device)
        self.embed_dim = self.feature_extractor.feature_dim
        
        self.fc1 = nn.Linear(self.embed_dim, fc_hidden1)
        self.bn1 = nn.BatchNorm1d(fc_hidden1, momentum=0.01)
        self.fc2 = nn.Linear(fc_hidden1, CNN_embed_dim)
        self.bn2 = nn.BatchNorm1d(CNN_embed_dim, momentum=0.01)
        self.drop_p = drop_p

    def forward(self, x_3d):
        cnn_embed_seq = []
        for t in range(x_3d.size(1)):
            raw_features = self.feature_extractor(x_3d[:, t])
            x = self.bn1(self.fc1(raw_features))
            x = F.relu(x)
            x = F.dropout(x, p=self.drop_p, training=self.training)
            x = self.bn2(self.fc2(x))
            cnn_embed_seq.append(x)
        cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose(0, 1)
        return cnn_embed_seq

class DecoderRNN(nn.Module):
    def __init__(self, CNN_embed_dim=512, h_RNN_layers=3, h_RNN=256, h_FC_dim=128, drop_p=0.3, num_classes=50):
        super(DecoderRNN, self).__init__()
        self.LSTM = nn.LSTM(input_size=CNN_embed_dim,
                            hidden_size=h_RNN,
                            num_layers=h_RNN_layers,
                            batch_first=True)
        self.fc1 = nn.Linear(h_RNN, h_FC_dim)
        self.fc2 = nn.Linear(h_FC_dim, num_classes)
        self.drop_p = drop_p

    def forward(self, x_RNN):
        self.LSTM.flatten_parameters()
        RNN_out, _ = self.LSTM(x_RNN, None)
        x = self.fc1(RNN_out[:, -1, :])
        x = F.relu(x)
        x = F.dropout(x, p=self.drop_p, training=self.training)
        x = self.fc2(x)
        return x

# ===== 训练函数 =====
def train(log_interval, model, device, train_loader, optimizer, epoch, class_weights):
    cnn_encoder, rnn_decoder = model
    cnn_encoder.train()
    rnn_decoder.train()
    
    losses = []
    scores = []
    N_count = 0
    total_batches = len(train_loader)
    
    for batch_idx, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device).view(-1)
        
        optimizer.zero_grad()
        
        features = cnn_encoder(X)
        output = rnn_decoder(features)
        
        loss = F.cross_entropy(output, y, weight=class_weights.to(device))
        losses.append(loss.item())
        
        y_pred = torch.max(output, 1)[1]
        step_score = accuracy_score(y.cpu().numpy(), y_pred.cpu().numpy())
        scores.append(step_score)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(cnn_encoder.parameters(), 5.0)
        torch.nn.utils.clip_grad_norm_(rnn_decoder.parameters(), 5.0)
        
        optimizer.step()
        
        N_count += X.size(0)
        
        if (batch_idx + 1) % log_interval == 0:
            print(f'Epoch: {epoch+1} [{N_count}/{len(train_loader.dataset)} '
                  f'({100.*(batch_idx+1)/total_batches:.0f}%)]\tLoss: {loss.item():.6f}, Accu: {100*step_score:.2f}%')
            print_gpu_usage("After batch")
            
    return losses, scores

# ===== 验证函数 =====
def validation(model, device, valid_loader, class_weights):
    cnn_encoder, rnn_decoder = model
    cnn_encoder.eval()
    rnn_decoder.eval()
    
    test_loss = 0
    all_y = []
    all_y_pred = []
    
    with torch.no_grad():
        for X, y in valid_loader:
            X, y = X.to(device), y.to(device).view(-1)
            features = cnn_encoder(X)
            output = rnn_decoder(features)
            
            loss = F.cross_entropy(output, y, weight=class_weights.to(device), reduction='sum')
            test_loss += loss.item()
            y_pred = output.max(1, keepdim=True)[1]
            
            all_y.extend(y.cpu())
            all_y_pred.extend(y_pred.squeeze().cpu())
    
    test_loss /= len(valid_loader.dataset)
    all_y = torch.tensor(all_y)
    all_y_pred = torch.tensor(all_y_pred)
    test_score = (all_y == all_y_pred).float().mean().item()
    
    print(f'\nValid set ({len(all_y)} samples): Avg loss: {test_loss:.4f}, Acc: {100*test_score:.2f}%')
    print_gpu_usage("After validation")
    
    return test_loss, test_score

# ===== 检查点保存和加载 =====
def save_checkpoint(state, filename):
    torch.save(state, filename)

def load_checkpoint(cnn_encoder, rnn_decoder, optimizer, scheduler, filename, device):
    checkpoint = torch.load(filename, map_location=device)
    cnn_encoder.load_state_dict(checkpoint['cnn_encoder_state_dict'])
    rnn_decoder.load_state_dict(checkpoint['rnn_decoder_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    best_valid_score = checkpoint['best_valid_score']
    no_improve_count = checkpoint.get('no_improve_count', 0)
    return start_epoch, best_valid_score, no_improve_count

# ===== 主训练流程，K-Fold 版本 =====
if __name__ == "__main__":
    random_seeds = [42]
    data_path = "./TGRS"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用设备: {device}")

    action_names = ['Plunging', 'Spilling', 'Surging']
    le = LabelEncoder()
    le.fit(action_names)

    fnames = os.listdir(data_path)
    actions, all_names = [], []
    for f in fnames:
        loc = f.find('-')
        if loc == -1:
            print(f"Unexpected file format: {f}")
            continue
        actions.append(f[:loc])
        all_names.append(f)

    all_X_list = all_names
    all_y_list = labels2cat(le, actions)

    k = 5
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

    CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768
    CNN_embed_dim = 512
    res_size = 224
    dropout_p = 0.3
    RNN_hidden_layers = 3
    RNN_hidden_nodes = 512
    RNN_FC_dim = 256
    epochs = 100
    batch_size = 50
    learning_rate = 1e-4
    log_interval = 10
    patience = 20
    window_size = 5
    begin_frame, end_frame, skip_frame = 1, 60, 1
    selected_frames = list(range(begin_frame, end_frame, skip_frame))

    transform = DINOv2Transform(res_size)
    params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 4, 'pin_memory': True} if device.type=='cuda' else {'batch_size': batch_size, 'shuffle': True}

    for seed in random_seeds:
        print(f"\n=== 随机种子: {seed} ===")
        torch.manual_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

        for fold, (train_idx, valid_idx) in enumerate(skf.split(all_X_list, all_y_list)):
            print(f"\n=== Fold {fold+1}/{k} ===")

            save_path = f"./TGRS_baseline/output_dinov2_fold{fold}"
            save_model_path = os.path.join(save_path, "models")
            os.makedirs(save_path, exist_ok=True)
            os.makedirs(save_model_path, exist_ok=True)

            train_labels = [all_y_list[i] for i in train_idx]
            train_labels_np = np.array(train_labels)
            class_sample_counts = np.array([(train_labels_np == i).sum() for i in range(len(action_names))])
            class_weights = 1. / (class_sample_counts + 1e-6)
            class_weights = class_weights * len(action_names) / class_weights.sum()
            class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

            train_set = Dataset_CRNN(data_path, [all_X_list[i] for i in train_idx],
                                    [all_y_list[i] for i in train_idx], selected_frames, transform=transform)
            valid_set = Dataset_CRNN(data_path, [all_X_list[i] for i in valid_idx],
                                    [all_y_list[i] for i in valid_idx], selected_frames, transform=transform)

            train_loader = DataLoader(train_set, **params)
            valid_loader = DataLoader(valid_set, **params)

            cnn_encoder = ResCNNEncoder(fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2,
                                        drop_p=dropout_p, CNN_embed_dim=CNN_embed_dim,
                                        device=device).to(device)
            rnn_decoder = DecoderRNN(CNN_embed_dim=CNN_embed_dim, h_RNN_layers=RNN_hidden_layers,
                                    h_RNN=RNN_hidden_nodes, h_FC_dim=RNN_FC_dim,
                                    drop_p=dropout_p, num_classes=len(action_names)).to(device)

            if torch.cuda.device_count() > 1:
                print(f"使用 {torch.cuda.device_count()} GPUs!")
                cnn_encoder = nn.DataParallel(cnn_encoder)
                rnn_decoder = nn.DataParallel(rnn_decoder)
                crnn_params = list(cnn_encoder.module.parameters()) + list(rnn_decoder.parameters())
            else:
                print("使用单GPU!")
                crnn_params = list(cnn_encoder.parameters()) + list(rnn_decoder.parameters())

            optimizer = torch.optim.Adam(crnn_params, lr=learning_rate, weight_decay=1e-5)
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=8, verbose=True)

            checkpoint_path = os.path.join(save_model_path, 'checkpoint.pth')

            train_losses, train_scores, valid_losses, valid_scores = [], [], [], []

            if os.path.exists(checkpoint_path):
                print(f"加载检查点: {checkpoint_path}")
                start_epoch, best_valid_score, no_improve_count = load_checkpoint(
                    cnn_encoder, rnn_decoder, optimizer, scheduler, checkpoint_path, device)

                for key, lst in zip(['train_losses', 'train_scores', 'valid_losses', 'valid_scores'],
                                    [train_losses, train_scores, valid_losses, valid_scores]):
                    fpath = os.path.join(save_path, f"{key}_seed{seed}.npy")
                    if os.path.exists(fpath):
                        arr = np.load(fpath).tolist()
                        lst.extend(arr if isinstance(arr, list) else [arr])
            else:
                print("未找到检查点，开始新训练")
                start_epoch = 0
                best_valid_score = -float('inf')
                no_improve_count = 0

            for epoch in range(start_epoch, epochs):
                print_gpu_usage(f"Fold{fold+1} Epoch {epoch+1} 开始前")
                epoch_start = time.time()

                print(f"\n训练 Fold{fold+1} Epoch {epoch+1}/{epochs}")
                train_loss, train_score = train(log_interval, [cnn_encoder, rnn_decoder], device, train_loader,
                                               optimizer, epoch, class_weights)

                print(f"验证 Fold{fold+1} Epoch {epoch+1}/{epochs}")
                valid_loss, valid_score = validation([cnn_encoder, rnn_decoder], device, valid_loader, class_weights)

                scheduler.step(valid_score)

                train_losses.append(np.mean(train_loss))
                train_scores.append(np.mean(train_score))
                valid_losses.append(valid_loss)
                valid_scores.append(valid_score)

                checkpoint = {
                    'epoch': epoch,
                    'cnn_encoder_state_dict': cnn_encoder.state_dict(),
                    'rnn_decoder_state_dict': rnn_decoder.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_valid_score': best_valid_score,
                    'no_improve_count': no_improve_count
                }
                save_checkpoint(checkpoint, checkpoint_path)

                np.save(os.path.join(save_path, f'train_losses_seed{seed}.npy'), np.array(train_losses))
                np.save(os.path.join(save_path, f'train_scores_seed{seed}.npy'), np.array(train_scores))
                np.save(os.path.join(save_path, f'valid_losses_seed{seed}.npy'), np.array(valid_losses))
                np.save(os.path.join(save_path, f'valid_scores_seed{seed}.npy'), np.array(valid_scores))

                if epoch >= window_size - 1:
                    window_avg = np.mean(valid_scores[-window_size:])
                    if window_avg > best_valid_score:
                        print(f"验证准确率提升 ({best_valid_score:.4f} → {window_avg:.4f})")
                        best_valid_score = window_avg
                        no_improve_count = 0
                        torch.save(cnn_encoder.state_dict(),
                                   os.path.join(save_model_path, f'best_cnn_encoder_seed{seed}.pth'))
                        torch.save(rnn_decoder.state_dict(),
                                   os.path.join(save_model_path, f'best_rnn_decoder_seed{seed}.pth'))
                    else:
                        no_improve_count += 1
                        print(f"无提升 {no_improve_count}/{patience}")

                epoch_time = time.time() - epoch_start
                print(f"Fold{fold+1} Epoch {epoch+1} 完成，耗时 {epoch_time//60:.0f}m {epoch_time%60:.0f}s")

                if no_improve_count >= patience:
                    print(f"连续{patience}轮无性能提升，提前结束训练 Fold{fold+1}")
                    break

            print(f"\nFold {fold+1} 最佳模型测试")
            cnn_encoder.load_state_dict(torch.load(os.path.join(save_model_path, f'best_cnn_encoder_seed{seed}.pth')))
            rnn_decoder.load_state_dict(torch.load(os.path.join(save_model_path, f'best_rnn_decoder_seed{seed}.pth')))
            test_loss, test_score = validation([cnn_encoder, rnn_decoder], device, valid_loader, class_weights)
            print(f"Fold {fold+1} 测试准确率: {test_score:.4f}")
            np.save(os.path.join(save_path, f'test_score_seed{seed}.npy'), np.array([test_score]))


使用设备: cpu
Unexpected file format: .DS_Store

=== 随机种子: 42 ===

=== Fold 1/5 ===


Using cache found in /Users/wuxi/.cache/torch/hub/facebookresearch_dinov2_main


使用单GPU!


TypeError: ReduceLROnPlateau.__init__() got an unexpected keyword argument 'verbose'