In [1]:
import numpy as np
import pandas as pd

from PIL import Image
from tqdm import tqdm
import d2l.torch as d2l

from torchvision import models
from torchvision.transforms import v2

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split



In [2]:
train_data = pd.read_csv("data/train.csv")
train_data

Unnamed: 0,image,label
0,images/0.jpg,maclura_pomifera
1,images/1.jpg,maclura_pomifera
2,images/2.jpg,maclura_pomifera
3,images/3.jpg,maclura_pomifera
4,images/4.jpg,maclura_pomifera
...,...,...
18348,images/18348.jpg,aesculus_glabra
18349,images/18349.jpg,liquidambar_styraciflua
18350,images/18350.jpg,cedrus_libani
18351,images/18351.jpg,prunus_pensylvanica


In [3]:
class LeaveDataset(Dataset):
    def __init__(self, x, y, train=True):
        self.x = x
        self.y = y
        self.train = train
        self.train_transpose = v2.Compose([
            v2.RandomHorizontalFlip(),
            v2.RandomVerticalFlip(),
            v2.RandomRotation(180, fill=(255, 255, 255)),
            v2.ColorJitter(0.5),
            v2.Resize((224, 224)),
            v2.PILToTensor(),
            v2.ToDtype(torch.float32),
            v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.test_transpose = v2.Compose([
            v2.Resize((224, 224)),
            v2.PILToTensor(),
            v2.ToDtype(torch.float32),
            v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.data_path = "data/images/"
        
    def __getitem__(self, idx):
        x = self.x[idx]
        y = self.y[idx]
        x = Image.open(self.data_path + x)
        x = self.train_transpose(x) if self.train else self.test_transpose(x)
        return x, y
    
    def __len__(self):
        return len(self.x)

In [4]:
# 加载训练数据集
x, y = train_data['image'], train_data['label']
x, y

(0            images/0.jpg
 1            images/1.jpg
 2            images/2.jpg
 3            images/3.jpg
 4            images/4.jpg
                ...       
 18348    images/18348.jpg
 18349    images/18349.jpg
 18350    images/18350.jpg
 18351    images/18351.jpg
 18352    images/18352.jpg
 Name: image, Length: 18353, dtype: object,
 0               maclura_pomifera
 1               maclura_pomifera
 2               maclura_pomifera
 3               maclura_pomifera
 4               maclura_pomifera
                   ...           
 18348            aesculus_glabra
 18349    liquidambar_styraciflua
 18350              cedrus_libani
 18351        prunus_pensylvanica
 18352            quercus_montana
 Name: label, Length: 18353, dtype: object)

In [5]:
def load_train_data():
    data = pd.read_csv('data/train.csv')
    x, y = data['image'], data['label']
    
    le = LabelEncoder()
    y = le.fit_transform(y)
    
    train_x, valid_x, train_y, valid_y = train_test_split(x, y, test_size=0.1, random_state=66, shuffle=True, stratify=y)
    
    train_ds, valid_ds = LeaveDataset(train_x.values, train_y), LeaveDataset(valid_x.values, valid_y)
    
    train_dl, valid_dl = DataLoader(train_ds, batch_size=128, shuffle=False, num_workers=3, persistent_workers=True), DataLoader(valid_ds, batch_size=128, shuffle=False, num_workers=3, persistent_workers=True)
    print(f'train={len(train_dl)}, valid={len(valid_dl)}')
    
    return train_dl, valid_dl, le

In [6]:
train_data, valid_data, le = load_train_data()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

train=130, valid=15


'cuda'

In [7]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(128, len(le.classes_))
)
model.to(device)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
loss = torch.nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=0.0001)
epochs = 50

for epoch in range(epochs):
    train_loss, valid_loss = [], []
    train_acc, valid_acc = [], []
    
    # 训练模型
    model.train()
    for x, y in train_data:
        x, y = x.to(device), y.to(device)
        optim.zero_grad()
        y_hat = model(x)
        l = loss(y_hat, y)
        l.backward()
        optim.step()
        
        train_loss.append(l.item())
        train_acc.append(d2l.Classifier().accuracy(y_hat, y).item())

    # 测试模型
    model.eval()
    with torch.no_grad():
        for x, y in valid_data:
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            l = loss(y_hat, y)

            valid_loss.append(l.item())
            valid_acc.append(d2l.Classifier().accuracy(y_hat, y).item())

    print(f'{epoch}:'
      f'train_l={np.mean(train_loss):.6f},train_acc={np.mean(train_acc):.6f},'
      f'valid_l={np.mean(valid_loss):.6f},valid_acc={np.mean(valid_acc):.6f}')