In [18]:
!wget -O food11.zip https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0

--2025-06-24 06:38:09--  https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.3.18, 2620:100:6018:18::a27d:312
Connecting to www.dropbox.com (www.dropbox.com)|162.125.3.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.dropbox.com/scl/fi/w5r49vs5e956w6c4z6ob9/food-11.zip?rlkey=3no5l2xjiqgk2ckwbewaanm5p&dl=0 [following]
--2025-06-24 06:38:09--  https://www.dropbox.com/scl/fi/w5r49vs5e956w6c4z6ob9/food-11.zip?rlkey=3no5l2xjiqgk2ckwbewaanm5p&dl=0
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc63e782d7b0d99570614d32615c.dl.dropboxusercontent.com/cd/0/inline/CsN7oWfj1NVl7zta8gTpLrUV7VgRvhDUjQZFlLzf5vldvh4l_X5DZk74whGohYgaOw5G2GsbXyKrJ75XA8X5oKkHCWD9hmLNucajaVme2ZRN7ECLzvx868JAXccZdGYuxPkeNvzDCLB5pi6x-JpLwqq0/file# [following]
--2025-06-24 06:38:09--  https://uc63e782d7b0d99570614d32615c.dl.dropboxusercontent.com/c

In [27]:
!unzip -o /kaggle/working/food11.zip -d /kaggle/working/

()

In [30]:
_exp_name = "sample"

In [31]:
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
from tqdm.auto import tqdm
import random

In [32]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

In [33]:
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])
train_tfm = transforms.Compose([
    transforms.RandomHorizontalFlip(), # 随即水平翻转
    transforms.RandomRotation(15), # 随机旋转
    transforms.ColorJitter(brightness = 0.2)
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

In [35]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
            
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
            
        return im,label

通道数增加：提取更复杂的特征
尺寸逐步减小：聚焦关键区域
卷积核大小：常用3x3(兼顾局部和计算效率),kernel_size = 3
    卷积核大小：小目标用3x3,大目标用5x5
通道数：按2的倍数增加(经验法则) 64->128->256->512
    通道数：每层x2,直到显存不足
步长/填充:保持尺寸用1/1,减半用2/0 stride = 1,padding=1
   步长：1(保细节)/2(降维)      填充：0/1 padding = kernel_size//2
全连接层维度：通常在512~4096之间试验，1024->512->11

卷积层堆叠：模拟人脑从整体到局部的识别过程
池化层：让模型不在乎物体的位置（上下左右都一样）
通道增加：有更多的检测点(有的专查纹理，有的专查颜色)
这个神经网络是CNN的经典套路，适合中小型图像分类任务(Kaggle比赛)，如果是更复杂的任务比如医学影像，就需要更深或者更特殊的结构

In [37]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
 
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0), 

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [38]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Classifier().to(device)

batch_size = 64
n_epochs = 8
patience = 5 # 早停

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)
# 再次使用Adam，考虑了历史梯度方向，形成惯性调参只需要调lr，其他参数用默认值
# Adam适用于大多数深度学习任务并且更快更稳真乃神器也！

In [43]:
train_set = FoodDataset("/kaggle/working/train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset("/kaggle/working/valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

In [44]:
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        imgs, labels = batch

        logits = model(imgs.to(device))
        loss = criterion(logits, labels.to(device))
        optimizer.zero_grad()
        
        loss.backward()
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
        optimizer.step()
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------

    model.eval()
    valid_loss = []
    valid_accs = []

    for batch in tqdm(valid_loader):

      
        imgs, labels = batch

        with torch.no_grad():
            logits = model(imgs.to(device))


        loss = criterion(logits, labels.to(device))

        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        valid_loss.append(loss.item())
        valid_accs.append(acc)

    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 001/008 ] loss = 1.87132, acc = 0.34823


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 001/008 ] loss = 1.95323, acc = 0.33254
[ Valid | 001/008 ] loss = 1.95323, acc = 0.33254 -> best
Best model found at epoch 0, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 002/008 ] loss = 1.53028, acc = 0.47462


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 002/008 ] loss = 1.80311, acc = 0.42776
[ Valid | 002/008 ] loss = 1.80311, acc = 0.42776 -> best
Best model found at epoch 1, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 003/008 ] loss = 1.31511, acc = 0.54767


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 003/008 ] loss = 1.56109, acc = 0.48811
[ Valid | 003/008 ] loss = 1.56109, acc = 0.48811 -> best
Best model found at epoch 2, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 004/008 ] loss = 1.14372, acc = 0.60291


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 004/008 ] loss = 1.55957, acc = 0.48270
[ Valid | 004/008 ] loss = 1.55957, acc = 0.48270


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 005/008 ] loss = 1.01483, acc = 0.65506


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 005/008 ] loss = 1.35305, acc = 0.55408
[ Valid | 005/008 ] loss = 1.35305, acc = 0.55408 -> best
Best model found at epoch 4, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 006/008 ] loss = 0.88709, acc = 0.68859


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 006/008 ] loss = 1.11980, acc = 0.61912
[ Valid | 006/008 ] loss = 1.11980, acc = 0.61912 -> best
Best model found at epoch 5, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 007/008 ] loss = 0.75974, acc = 0.73985


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 007/008 ] loss = 1.19049, acc = 0.61437
[ Valid | 007/008 ] loss = 1.19049, acc = 0.61437


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 008/008 ] loss = 0.66031, acc = 0.77120


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 008/008 ] loss = 1.23379, acc = 0.60027
[ Valid | 008/008 ] loss = 1.23379, acc = 0.60027


In [45]:
test_set = FoodDataset("/kaggle/working/test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

In [46]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

  0%|          | 0/47 [00:00<?, ?it/s]

In [47]:
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)