In [1]:
pip install torch torchvision timm optuna


Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import pandas as pd
import timm
from torch.cuda.amp import GradScaler, autocast

# 定义数据变换
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(64),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(64),
    transforms.CenterCrop(64),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 定义数据集
class DogBreedDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx, 0]
        image = self.dataframe.iloc[idx, 1]
        label = self.dataframe.iloc[idx, 2]
        if self.transform:
            image = self.transform(image)
        return image, label

# 准备数据
class_names = ['golden-retriever', 'labrador-retriever', 'german-shepherd', 'yorkshire-terrier', 'poodle']
label_map = {breed: idx for idx, breed in enumerate(class_names)}
train_dir = '/kaggle/input/2024-datasciencetraining1-competition2/train/train'

data = [
    {"image_name": image_file, "image": Image.open(os.path.join(train_dir, subdir, image_file)).convert("RGB"), "label": label_map[subdir]}
    for subdir in class_names
    for image_file in os.listdir(os.path.join(train_dir, subdir))
]

df = pd.DataFrame(data)
train_dataset = DogBreedDataset(df, transform=train_transforms)
val_dataset = DogBreedDataset(df, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# 定义模型
class DogClassifier(nn.Module):
    def __init__(self, model_name='convnextv2_base', num_classes=len(class_names)):
        super(DogClassifier, self).__init__()
        self.model = timm.create_model(model_name, pretrained=True)
        self.model.fc = nn.Linear(self.model.get_classifier().in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# 设置最佳参数
best_lr = 0.00014852968150996223
best_optimizer_name = 'Adam'

# 使用最佳参数训练最终模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DogClassifier(model_name='convnextv2_base')
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = getattr(optim, best_optimizer_name)(model.parameters(), lr=best_lr)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3, verbose=True)
scaler = GradScaler()

# 训练模型并保存最佳模型
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=25):
    best_model_wts = model.state_dict()
    best_acc = 0.0
    early_stopping_patience = 10
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        for phase in ['train', 'val']:
            model.train() if phase == 'train' else model.eval()
            dataloader = train_loader if phase == 'train' else val_loader

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    with autocast():
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                    if phase == 'train':
                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                epochs_no_improve = 0
            elif phase == 'val':
                epochs_no_improve += 1

        scheduler.step(epoch_acc)

        if epochs_no_improve == early_stopping_patience:
            print("Early stopping triggered")
            break

    print(f'Best val Acc: {best_acc:.4f}')
    model.load_state_dict(best_model_wts)
    return model

model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=100)

def evaluate_model(model, dataloader):
    model.eval()
    running_corrects = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)

    acc = running_corrects.double() / len(dataloader.dataset)
    print(f'Validation Accuracy: {acc:.4f}')

evaluate_model(model, val_loader)

# 生成提交文件
test_data_dir = "/kaggle/input/2024-datasciencetraining1-competition2/test/test"
test_images = os.listdir(test_data_dir)

pred_labels = []
for image_file in test_images:
    image_path = os.path.join(test_data_dir, image_file)
    image = Image.open(image_path).convert("RGB")
    image = val_transforms(image)
    image = image.unsqueeze(0).to(device)
    outputs = model(image)
    _, preds = torch.max(outputs, 1)
    pred_labels.append(class_names[preds.item()])

submit_df = pd.DataFrame({
    'image_name': test_images,
    'label': pred_labels
})

submit_df.to_csv('/kaggle/working/submission.csv', index=False)

model.safetensors:   0%|          | 0.00/355M [00:00<?, ?B/s]

Epoch 1/100
----------
train Loss: 2.6679 Acc: 0.1930
val Loss: 1.5965 Acc: 0.2590
Epoch 2/100
----------
train Loss: 1.6155 Acc: 0.2740
val Loss: 1.3930 Acc: 0.4095
Epoch 3/100
----------
train Loss: 1.3414 Acc: 0.4410
val Loss: 0.8609 Acc: 0.6885
Epoch 4/100
----------
train Loss: 1.0512 Acc: 0.5720
val Loss: 0.6432 Acc: 0.7635
Epoch 5/100
----------
train Loss: 0.8584 Acc: 0.6705
val Loss: 0.5940 Acc: 0.7780
Epoch 6/100
----------
train Loss: 0.7667 Acc: 0.7095
val Loss: 0.4970 Acc: 0.8245
Epoch 7/100
----------
train Loss: 0.6635 Acc: 0.7415
val Loss: 0.3347 Acc: 0.8885
Epoch 8/100
----------
train Loss: 0.5991 Acc: 0.7695
val Loss: 0.3251 Acc: 0.8880
Epoch 9/100
----------
train Loss: 0.5443 Acc: 0.8055
val Loss: 0.2077 Acc: 0.9300
Epoch 10/100
----------
train Loss: 0.5290 Acc: 0.8045
val Loss: 0.2027 Acc: 0.9370
Epoch 11/100
----------
train Loss: 0.4674 Acc: 0.8290
val Loss: 0.1551 Acc: 0.9465
Epoch 12/100
----------
train Loss: 0.4952 Acc: 0.8235
val Loss: 0.1458 Acc: 0.9545
E

In [3]:
# import timm

# resnet_models = timm.list_models('*resnet*')

# for model_name in resnet_models:
#     print(model_name)