In [1]:
import numpy as np
from torch import nn, optim
from torch.autograd import Variable
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch
import sys

In [2]:
# 數據預處理
transform = transforms.Compose([
    transforms.RandomResizedCrop(224), # 對圖像進行隨機的crop 以後再resize成固定的大小
    transforms.RandomRotation(20), # 隨機旋轉角度
    transforms.RandomHorizontalFlip(p=0.5), # 隨機水平翻轉
    transforms.ToTensor()
])

# 讀取數據
root = 'datasets/dog_cat'
train_dataset = datasets.ImageFolder(root + '/train', transform)
test_dataset = datasets.ImageFolder(root + '/test', transform)

# 導入數據
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=True)

In [3]:
classes = train_dataset.classes
classes_index = train_dataset.class_to_idx
print(classes)
print(classes_index)

['cat', 'dog']
{'cat': 0, 'dog': 1}


In [9]:
model = models.vgg16(pretrained = True)
'models.resnet, models.DenseNet'
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [10]:
# 如果我們只想訓練模型的全連接層


# for parma in model.parameters():
  # 不參與梯度更新
  # parma.requires_grad = False

# 建構新的全連接層, 改成只有兩個輸出
model.classifier = torch.nn.Sequential(
    torch.nn.Linear(25088, 100),
    torch.nn.ReLU(),
    torch.nn.Dropout(p=0.5),
    torch.nn.Linear(100, 2),
    torch.nn.Softmax()
)

In [14]:
LR = 0.001
# 定義代價函數, CrossEntropyLoss會先做一個softmax, 所以網路結構可以省一個
entropy_loss = nn.CrossEntropyLoss()
# 定義優化器
optimizer = optim.SGD(model.parameters(), lr=LR)

In [15]:
def train():
  # 模型的訓練狀態, dropout啟用
  model.train()
  for i, data in enumerate(train_loader):
    # 獲得一個批次的數據和標籤
    inputs, labels = data
    # 獲得薄型的預測結果
    out = model(inputs)
    # 交叉商代價函數out(batch,C),C是class類別,labels(batch), 交叉商計算shape可以不一致
    loss = entropy_loss(out, labels)
    # 梯度清0
    optimizer.zero_grad()
    # 計算梯度
    loss.backward()
    # 修改權值 = 更新權重
    optimizer.step()


def test():
  # 模型的測試狀態, dropout不啟用
  model.eval()
  correct=0
  for i, data in enumerate(test_loader):
    # 獲得一個批次的數據跟標籤
    inputs, labels = data
    # 獲得模型與預測結果 (64, 10)
    out = model(inputs)
    # 獲得最大值, 以及最大值所在的位置,1表示第1個維度
    _, predicted = torch.max(out, 1)
    # 預測正確的位置
    correct += (predicted == labels).sum()
  print('Test acc:{0}'.format(correct.item()/len(test_dataset)))
  correct=0
  for i, data in enumerate(train_loader):
    # 獲得一個批次的數據跟標籤
    inputs, labels = data
    # 獲得模型與預測結果 (64, 10)
    out = model(inputs)
    # 獲得最大值, 以及最大值所在的位置,1表示第1個維度
    _, predicted = torch.max(out, 1)
    # 預測正確的位置
    correct += (predicted == labels).sum()
  print('Train acc:{0}'.format(correct.item()/len(train_dataset)))


for epoch in range(4):
  print('epoch:',epoch)
  train()
  test()

epoch: 0


  input = module(input)


Test acc:0.515
Train acc:0.5025
epoch: 1
Test acc:0.51
Train acc:0.5025
epoch: 2


KeyboardInterrupt: 

In [14]:
torch.save(model.state_dict(),'checkpoints/cat_dog_v01.pth')