In [1]:
import jittor as jt
from jittor import nn
from numpy import random
import numpy as np

[38;5;2m[i 1023 17:32:01.671895 60 log.cc:351] Load log_sync: 1[m
[38;5;2m[i 1023 17:32:01.716431 60 compiler.py:956] Jittor(1.3.10.0) src: /home/jittor/SCC_Model/ViT/.venv/lib/python3.10/site-packages/jittor[m
[38;5;2m[i 1023 17:32:01.721645 60 compiler.py:957] g++ at /usr/bin/g++(11.4.0)[m
[38;5;2m[i 1023 17:32:01.722571 60 compiler.py:958] cache_path: /home/jittor/.cache/jittor/jt1.3.10/g++11.4.0/py3.10.12/Linux-6.6.87.2x4a/AMDRyzen97940Hxd7/fa38/main[m
[38;5;2m[i 1023 17:32:01.808233 60 install_cuda.py:96] cuda_driver_version: [12, 9][m
[38;5;2m[i 1023 17:32:01.821657 60 __init__.py:412] Found /home/jittor/.cache/jittor/jtcuda/cuda12.2_cudnn8_linux/bin/nvcc(12.2.140) at /home/jittor/.cache/jittor/jtcuda/cuda12.2_cudnn8_linux/bin/nvcc.[m
[38;5;2m[i 1023 17:32:01.872842 60 __init__.py:412] Found addr2line(2.38) at /usr/bin/addr2line.[m
[38;5;2m[i 1023 17:32:01.942623 60 compiler.py:1013] cuda key:cu12.2.140[m
[38;5;2m[i 1023 17:32:02.580592 60 __init__.py:227] Total 

In [2]:
import os
import sys
current_dir = os.path.abspath('.')
project_root = os.path.dirname(current_dir)

if project_root in sys.path:
    sys.path.remove(project_root)

sys.path.insert(0, project_root)

In [3]:
from data_loader import TomatoDataset
from data_loader import get_dataloader
from models.vit_model import Visual_Transformer
from config import Config

In [None]:
config = Config()

: 

In [None]:
# 设置随机种子
def set_seed(seed=42):
    np.random.seed(seed)
    jt.set_global_seed(seed)

# 计算准确率
def calculate_accuracy(outputs, labels):
    """计算Top-1准确率"""
    preds = jt.argmax(outputs, dim=1)[0]
    correct = jt.sum(preds == labels).item()
    total = labels.shape[0]
    return correct / total

# 训练一个epoch
def train_epoch(model, train_loader, optimizer, epoch, total_epochs):
    """训练一个epoch"""
    model.train()
    
    total_loss = 0.0
    total_acc = 0.0
    batch_count = 0
    
    print(f"\n{'='*60}")
    print(f"Epoch [{epoch+1}/{total_epochs}] - Training")
    print(f"{'='*60}")
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        # 前向传播
        outputs = model(images)
        
        # 计算损失
        loss = nn.cross_entropy_loss(outputs, labels)
        
        # 反向传播
        optimizer.step(loss)
        
        # 计算准确率
        acc = calculate_accuracy(outputs, labels)
        
        # 累计统计
        total_loss += float(loss)
        total_acc += acc
        batch_count += 1
        
        # 打印进度
        if (batch_idx + 1) % 10 == 0:
            avg_loss = total_loss / batch_count
            avg_acc = total_acc / batch_count
            print(f"  Batch [{batch_idx+1}/{len(train_loader)}] "
                  f"Loss: {avg_loss:.4f} | Acc: {avg_acc*100:.2f}%")
    
    avg_loss = total_loss / batch_count
    avg_acc = total_acc / batch_count
    
    return avg_loss, avg_acc

# 验证
def validate(model, val_loader, epoch, total_epochs):
    """验证模型"""
    model.eval()
    
    total_loss = 0.0
    total_acc = 0.0
    batch_count = 0
    
    # 混淆矩阵
    confusion_matrix = np.zeros((config.NUM_CLASSES, config.NUM_CLASSES), dtype=np.int32)
    
    print(f"\n{'='*60}")
    print(f"Epoch [{epoch+1}/{total_epochs}] - Validation")
    print(f"{'='*60}")
    
    with jt.no_grad():
        for images, labels in val_loader:
            # 前向传播
            outputs = model(images)
            
            # 计算损失
            loss = nn.cross_entropy_loss(outputs, labels)
            
            # 计算准确率
            acc = calculate_accuracy(outputs, labels)
            
            # 更新混淆矩阵
            preds = jt.argmax(outputs, dim=1)[0].numpy()
            labels_np = labels.numpy()
            for pred, label in zip(preds, labels_np):
                confusion_matrix[int(label), int(pred)] += 1
            
            total_loss += float(loss)
            total_acc += acc
            batch_count += 1
    
    avg_loss = total_loss / batch_count
    avg_acc = total_acc / batch_count
    
    print(f"  Val Loss: {avg_loss:.4f} | Val Acc: {avg_acc*100:.2f}%")
    
    # 打印每个类别的准确率
    print(f"\n  Per-class Accuracy:")
    for i in range(config.NUM_CLASSES):
        class_correct = confusion_matrix[i, i]
        class_total = confusion_matrix[i, :].sum()
        class_acc = class_correct / class_total if class_total > 0 else 0
        print(f"    {CLASS_NAMES[i]:<25}: {class_acc*100:>6.2f}% ({class_correct}/{class_total})")
    
    return avg_loss, avg_acc

# 主训练函数
def train():
    """主训练流程"""
    
    # 设置设备
    jt.flags.use_cuda = 1 if jt.has_cuda else 0
    print(f"\n{'='*60}")
    print(f"Training Configuration")
    print(f"{'='*60}")
    print(f"Device: {'GPU' if jt.flags.use_cuda else 'CPU'}")
    print(f"Batch Size: {config.BATCH_SIZE}")
    print(f"Learning Rate: {config.LEARNING_RATE}")
    print(f"Epochs: 50")  # 默认50轮训练
    print(f"Image Size: {config.IMG_SIZE}")
    print(f"{'='*60}\n")
    
    # 设置随机种子
    set_seed(42)
    
    # 创建保存目录
    save_dir = 'checkpoints'
    os.makedirs(save_dir, exist_ok=True)
    
    # 加载数据
    print("Loading datasets...")
    train_loader = get_dataloader(
        root_dir= project_root+'/tomato_yolo_dataset',  # 默认数据集路径
        mode='train',
        batch_size=config.BATCH_SIZE,
        img_size=config.IMG_SIZE,
        shuffle=True,
        num_workers=0
    )
    
    val_loader = get_dataloader(
        root_dir=project_root+'/tomato_yolo_dataset',  # 默认数据集路径
        mode='val',
        batch_size=config.BATCH_SIZE,
        img_size=config.IMG_SIZE,
        shuffle=False,
        num_workers=0
    )
    
    # 创建模型
    print("\nCreating model...")
    model = Visual_Transformer(
        img_size=config.IMG_SIZE,
        patch_size=config.PATCH_SIZE,
        in_channels=config.IN_CHANNELS,
        embed_dim=config.EMBED_DIM,
        depth=config.NUM_LAYERS,
        num_heads=config.NUM_HEADS,
        dropout_rate=config.DROPOUT,
        hidden_dim=config.MLP_Hidden_Dim
    )
    
    # 优化器
    optimizer = nn.Adam(
        model.parameters(),
        lr=config.LEARNING_RATE,
        weight_decay=1e-4
    )
    
    # 学习率调度器（余弦退火）
    def adjust_learning_rate(optimizer, epoch, total_epochs, base_lr):
        """余弦退火学习率"""
        lr = base_lr * 0.5 * (1.0 + np.cos(np.pi * epoch / total_epochs))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return lr
    
    # 训练历史
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'lr': []
    }
    
    best_val_acc = 0.0
    best_epoch = 0
    total_epochs = 50  # 默认训练50轮
    
    # 训练循环
    print("\nStarting training...\n")
    for epoch in range(total_epochs):
        # 调整学习率
        current_lr = adjust_learning_rate(
            optimizer, epoch, total_epochs, config.LEARNING_RATE
        )
        print(f"Learning Rate: {current_lr:.6f}")
        
        # 训练
        train_loss, train_acc = train_epoch(
            model, train_loader, optimizer, epoch, total_epochs
        )
        
        # 验证
        val_loss, val_acc = validate(
            model, val_loader, epoch, total_epochs
        )
        
        # 记录历史
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['lr'].append(current_lr)
        
        # 保存最佳模型
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_epoch = epoch
            
            model_path = os.path.join(save_dir, 'best_model.pkl')
            jt.save(model.state_dict(), model_path)
            print(f"\n  ✓ Best model saved! Val Acc: {best_val_acc*100:.2f}%")
        
        # 定期保存checkpoint
        if (epoch + 1) % 10 == 0:
            checkpoint_path = os.path.join(save_dir, f'checkpoint_epoch_{epoch+1}.pkl')
            jt.save(model.state_dict(), checkpoint_path)
            print(f"  ✓ Checkpoint saved: {checkpoint_path}")
        
        print(f"\n  Summary: Train Acc: {train_acc*100:.2f}% | "
              f"Val Acc: {val_acc*100:.2f}% | "
              f"Best Val Acc: {best_val_acc*100:.2f}% (Epoch {best_epoch+1})")
    
    # 保存最终模型
    final_model_path = os.path.join(save_dir, 'final_model.pkl')
    jt.save(model.state_dict(), final_model_path)
    
    # 保存训练历史
    history_path = os.path.join(save_dir, 'training_history.json')
    with open(history_path, 'w') as f:
        # 转换numpy类型为Python原生类型
        history_serializable = {
            k: [float(v) for v in values] for k, values in history.items()
        }
        json.dump(history_serializable, f, indent=4)
    
    print(f"\n{'='*60}")
    print(f"Training Complete!")
    print(f"{'='*60}")
    print(f"Best Validation Accuracy: {best_val_acc*100:.2f}% (Epoch {best_epoch+1})")
    print(f"Best model saved to: {os.path.join(save_dir, 'best_model.pkl')}")
    print(f"Training history saved to: {history_path}")
    print(f"{'='*60}\n")


if __name__ == '__main__':
    train()

[38;5;2m[i 1023 17:32:03.857006 60 cuda_flags.cc:55] CUDA enabled.[m



Training Configuration
Device: GPU
Batch Size: 16
Learning Rate: 0.001
Epochs: 50
Image Size: 224

Loading datasets...

Creating model...

Starting training...

Learning Rate: 0.001000

Epoch [1/50] - Training



Compiling Operators(28/83) used: 3.31s eta: 6.51s 47/83) used: 5.32s eta: 4.07s 49/83) used: 9.32s eta: 6.47s 51/83) used: 13.3s eta: 8.37s 52/83) used: 15.3s eta: 9.14s 53/83) used: 17.3s eta: 9.82s 54/83) used: 19.3s eta: 10.4s 55/83) used: 20.4s eta: 10.4s 56/83) used: 23.4s eta: 11.3s 57/83) used: 24.4s eta: 11.1s 58/83) used: 27.4s eta: 11.8s 59/83) used: 28.4s eta: 11.5s 60/83) used: 30.4s eta: 11.6s 61/83) used: 31.4s eta: 11.3s 62/83) used: 33.4s eta: 11.3s 63/83) used: 35.4s eta: 11.2s 65/83) used: 39.4s eta: 10.9s 66/83) used: 41.4s eta: 10.7s 67/83) used: 43.4s eta: 10.4s 68/83) used: 45.4s eta:   10s 69/83) used: 47.4s eta: 9.62s 