In [1]:
import numpy as np 
import torch
import torch.nn as nn
from matplotlib import pyplot as plt

In [2]:
from google.colab import drive # 連接到google drive
drive.mount('/content/drive')

Mounted at /content/drive


匯入labels.csv

In [3]:
path = './drive/MyDrive/Dog/labels.csv' 
dogs_numpy = np.loadtxt(path, dtype=str, delimiter=',', skiprows=1) # 處理 excel有內建分隔delimiter 去除第一行(id breed)
dogs_numpy, dogs_numpy.shape

(array([['000bec180eb18c7604dcecc8fe0dba07', 'boston_bull'],
        ['001513dfcb2ffafc82cccf4d8bbaba97', 'dingo'],
        ['001cdf01b096e06d78e9e5112d419397', 'pekinese'],
        ...,
        ['ffe2ca6c940cddfee68fa3cc6c63213f', 'airedale'],
        ['ffe5f6d8e2bff356e9482a80a6e29aac', 'miniature_pinscher'],
        ['fff43b07992508bc822f33d8ffd902ae', 'chesapeake_bay_retriever']],
       dtype='<U32'), (10222, 2))

匯入breed.txt

In [22]:
path = './drive/MyDrive/Dog/breed.txt' 
breed_numpy = np.loadtxt(path, dtype=str)
#breed_numpy

In [5]:
# 轉成list
#breed_list = list() # (可略) 空list為list()或[]
breed_list = list(breed_numpy)
#breed_list

In [23]:
# 轉成dic
dic = {} # (不可略) 空dict為dict()或{}
for i in range(len(breed_list)):
    dic[breed_list[i]] = i # dic[ key ] = value
#dic

處理dog_numpy(labels.csv)

In [7]:
dog_labels = list(dogs_numpy[:,-1]) # dogs_numpy為(10222, 2) 10222總狗 橫向看為(id, breed) [:,-1]為取所有狗的breed而已
dog_labels = [dic[breed_name] for breed_name in dog_labels] # dic[breed_name]最後會產出value 即找dog_labels內對應到dict的標號
dog_labels # 拚加成list dic[]已經是一個裝有breed_list的字典了 有10222個 算是10222個不同圖片的正確答案
dog_t_labels = torch.tensor(dog_labels) # 轉成tensor
dog_t_labels

# 舉例: dogs_numpy的第一種狗'boston_bull' 在breed.txt中為第19個 故dic['boston_bull']=19 而dog_labels[0]和dog_t_labels[0]為19和tensor(19)

tensor([19, 37, 85,  ...,  3, 75, 28])

匯入train圖片

In [8]:
import os
dog_file_name = [i +'.jpg' for i in dogs_numpy[:,0]] # dogs_numpy[:,0]為id dog_file_name為list
dog_file_path = [os.path.join('./drive/MyDrive/Dog/train', i_jpg) # 取train中每一個圖片 os.path.join為合併路徑
          for i_jpg in dog_file_name] 
#dog_file_path

設定資料集

前處理preprocess(含正規化)

In [9]:
from torchvision import transforms, utils

preprocess = transforms.Compose([transforms.Resize([96, 96]), # 變正方形
                  transforms.ToTensor(),
                  transforms.Normalize(mean=[0.4766, 0.4523, 0.3923], std=[0.2205, 0.2154, 0.2137]) # 正規化
])

定義loader

In [10]:
# 輸入path返回tensor
from PIL import Image # 匯入外部圖片
def default_loader(path):
    img_pil = Image.open(path) # 打開真正的圖片
    img_tensor = preprocess(img_pil) # 處理圖片
    return img_tensor

定義Dataset

In [11]:
# 定義Dataset類別 (init / getitem / len)
from torch.utils.data import Dataset, DataLoader

class TrainSet(Dataset):
  def __init__(self, load=default_loader): # 初始化
      self.images = dog_file_path # dog_file_path為照片的完整路徑
      self.target = dog_t_labels # 真正的狗的labels 正確答案
      self.loader = default_loader
  
  def __getitem__(self, index): # 內建getitem的規則(有點像是打開一張圖片的方法)
      fn = self.images[index] # 也就是dog_file_path[1]
      img = self.loader(fn) # 輸出真正的圖片 並前處理後 返回其tensor
      target = self.target[index] # dog_t_labels[1] 真正的labels
      return img, target

  def __len__(self):
    return len(self.images) # 資料集長度

In [12]:
train_data = TrainSet() # 把model改名

定義train_loader

In [13]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)  # DataLoader

設定model

model

In [34]:
# 順序 : (conv + 神經網路 + 縮小)*2 + 攤平 + (線性 + 神經網路 + 線性)*1 
import torch.nn.functional as F

class Net(nn.Module): # 名字為Net
  def __init__(self): # 初始化
    super().__init__() # 必寫
    self.conv1 = nn.Conv2d(3, 16, kernel_size=5, padding=2) # 3個channel的圖片 用16種不同的kernel 每個kernal大小為5*5 故padding為2 
    self.conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
    self.conv3 = nn.Conv2d(32, 16, kernel_size=3, padding=1) 
    self.conv4 = nn.Conv2d(16, 6, kernel_size=3, padding=1) 
    self.fc1 = nn.Linear(6 * 6 * 6, 180) # 線性訓練 為6個channel 6*6為配合後面圖片縮小 
    self.fc2 = nn.Linear(180, 120) # 變成possibility 有120個可能(種狗)
    
  def forward(self, x): # 訓練順序
    out = F.max_pool2d(torch.tanh(self.conv1(x)), 2) # 經過tahn神經網路 再縮小成二分之一 
    out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
    out = F.max_pool2d(torch.tanh(self.conv3(out)), 2)
    out = F.max_pool2d(torch.tanh(self.conv4(out)), 2)
    out = out.view(-1, 6 * 6 * 6) 
    out = torch.tanh(self.fc1(out)) # 變扁平去訓練後 再進去一個tahn
    out = self.fc2(out) # 最後輸出2個possibility
    return out

In [35]:
model = Net() # model改名

訓練一張圖片

In [36]:
img_1_t = default_loader(dog_file_path[0]) # 第一張圖片的tensor
img_1_t = img_1_t.unsqueeze(0) # 變成可以訓練的格式 torch.Size([1, 3, 32, 32]) ( B, C, H, W )
out = model(img_1_t)
out.shape

torch.Size([1, 120])

In [37]:
_, index = torch.max(out, dim=1) # 分類 機率中較大的 即為越像該物
index # model猜的

dogname = breed_numpy[index] # 對應到的狗名
dogname # 預測的狗名字

'maltese_dog'

設定訓練參數

optimizer + loss_fn

In [38]:
import torch.optim as optim # 最佳化資料庫

optimizer = optim.SGD(model.parameters(), lr=1e-4) 
loss_fn = nn.CrossEntropyLoss() # 計算實際輸出(機率)與期望輸出(機率)的距離

In [39]:
# 學太慢改善方式 : 學習率調大 batch調大

設定train loop

In [41]:
import datetime # 計算時間用

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1, n_epochs + 1): 
    loss_train = 0.0 # 設定loss
    
    for imgs, target in train_loader:
      outputs = model(imgs) # 訓練 得出機率 outputs為tensor(1,120)
      loss = loss_fn(outputs, target) # 訓練機率和真正答案比較

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      loss_train += loss.item() # loss_train為累加loss 有.item()就可以跳離grad

    if epoch == 1 or epoch % 5 == 0: 
      print('{} Epoch {}, Training loss {}'.format(
        datetime.datetime.now(), epoch,
        loss_train / len(train_loader))) # 除以loss次數

train loop

In [None]:
training_loop(
  n_epochs = 50,
  optimizer = optimizer,
  model = model,
  loss_fn = loss_fn,
  train_loader = train_loader
)