<a href="https://colab.research.google.com/github/DavoodSZ1993/Dive_into_Deep_Learning/blob/main/14_14_dog_breed_identification_on_kaggle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install d2l==1.0.0-alpha1.post0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.0/93.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.9/121.9 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.9/84.9 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m38.3 MB/s[0m eta [36m0:00:00[0m
[?25h

## 14.14 Dog Breed Identification (ImageNet Dogs) on Kaggle

In [2]:
import os
import torch
import torchvision
from torch import nn
from d2l import torch as d2l

### 14.14.1 Obtaining and Organizing the Dataset

#### Downloading the Dataset

In [3]:
d2l.DATA_HUB['dog_tiny'] = (d2l.DATA_URL + 'kaggle_dog_tiny.zip',
                            '0cb91d09b814ecdc07b50f31f8dcad3e81d6a86d')

demo = True
if demo:
  data_dir = d2l.download_extract('dog_tiny')
else:
  data_dir = os.path.join('..', 'data', 'dog-breed-identification')

Downloading ../data/kaggle_dog_tiny.zip from http://d2l-data.s3-accelerate.amazonaws.com/kaggle_dog_tiny.zip...


#### Organizing the Dataset

In [5]:
def reorg_dog_data(data_dir, valid_ratio):
  labels = d2l.read_csv_labels(os.path.join(data_dir, 'labels.csv'))
  d2l.reorg_train_valid(data_dir, labels, valid_ratio)
  d2l.reorg_test(data_dir)

batch_size = 32 if demo else 128
valid_ratio = 0.1
reorg_dog_data(data_dir, valid_ratio)

### 14.14.2 Image Augmentation

In [6]:
transform_train = torchvision.transforms.Compose([
    torchvision.transforms.RandomResizedCrop(224, scale=(0.08, 1),
                                             ratio=(3.0/4.0, 4.0/3.0)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ColorJitter(brightness=0.4,
                                       contrast=0.4,
                                       saturation=0.4),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])])

In [7]:
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])])

### 14.14.3 Reading the Dataset

In [8]:
train_ds, train_valid_ds = [torchvision.datasets.ImageFolder(
    os.path.join(data_dir, 'train_valid_test', folder),
    transform=transform_train) for folder in ['train', 'train_valid']]

valid_ds, test_ds = [torchvision.datasets.ImageFolder(
    os.path.join(data_dir, 'train_valid_test', folder),
    transform=transform_test) for folder in ['valid', 'test']]

In [11]:
train_iter, train_valid_iter = [torch.utils.data.DataLoader(
    dataset, batch_size, shuffle=True, drop_last=True)
    for dataset in (train_ds, train_valid_ds)]

valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size, shuffle=False,
                                         drop_last=True)

test_iter = torch.utils.data.DataLoader(test_ds, batch_size, shuffle=False,
                                        drop_last=False)

### 14.14.4 Fine-Tuning a Pretrained Model

In [12]:
def get_net(devices):
  finetune_net = nn.Sequential()
  finetune_net.features = torchvision.models.resnet34(pretrained=True)
  finetune_net.output_new = nn.Sequential(nn.Linear(1000, 256),
                                          nn.ReLU(),
                                          nn.Linear(256, 120))
  finetune_net = finetune_net.to(devices[0])
  for param in finetune_net.features.parameters():
    param.requires_grad = False
  return finetune_net

In [13]:
loss = nn.CrossEntropyLoss(reduction='none')

def evaluate_loss(data_iter, net, devices):
  l_sum, n = 0.0, 0
  for features, labels in data_iter:
    features, labels = features.to(devices[0]), labels.to(devices[0])
    outputs = net(features)
    l = loss(outputs, labels)
    l_sum += l.sum()
    n += labels.numel()
  return l_sum / n