In [1]:
import torch
import os
import numpy as np
from PIL import Image
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from torch import nn
import torch.nn.functional as F
from tqdm import tqdm

In [2]:
torch.cuda.is_available()

True

In [3]:
class DogCat (Dataset):
    def __init__ (self, root, T = None, train = True, test = False):
        """
            主要目标:
                获取所有图片的地址，并根据训练、验证、测试划分数据
        """
        self.test = test
       # imgs = [os.path.join(root, img) for img in os.walk(root)]
        imgs = [i for i in (os.path.join(root, f) for f in os.listdir(root)) if os.path.isfile(i)]
        print (imgs)
        """
        if self.test:
            imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2].split('/')[-1]))
        else:
            imgs = sorted(imgs, key=lambda x: int(x.split('.')[-2]))

        
        """
        imgs_num = len(imgs)
        
        # 直接进行预测
        if self.test:
            self.imgs = imgs
        # 训练集
        elif train:
            self.imgs = imgs[: int(0.7 * imgs_num)]
        # 测试集
        else: 
            self.imgs = imgs[int(0.7 * imgs_num) :]
            
        if T is None:

            # 测试集或者验证集或者预测集放在这里
            if self.test or not train:
                self.transform = transforms.Compose ([
                    transforms.Resize ((256, 256)),
                    transforms.RandomCrop ((224, 224)),
                    transforms.ToTensor (),
                    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
                ])
            else:
                self.transform = transforms.Compose ([
                    transforms.Resize ((256, 256)),
                    transforms.RandomCrop ((224, 224)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor (),
                    transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
                ])
    def __getitem__ (self, index):
        """
            一次返回一张图片的数据
        """
        img_path = self.imgs[index]
        # 找到label
        if self.test:
            label = int(self.imgs[index].split('.')[-2].split('/')[-1])
        else:
            label = img_path.split('/')[-1].split('.')[0]
            if label == 'dog':
                label = 1
            elif label == 'cat':
                label = 0
        data = Image.open (img_path)
        data = self.transform (data)
        return data, label
    
    def __len__(self):
        return len (self.imgs)

In [4]:
class AlexNet (nn.Module):
    def __init__ (self):
        super (AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [5]:
PATH = 'cat_dog/'

In [6]:
model = AlexNet().cuda()

In [7]:
train_data = DogCat (PATH, train = True)
val_data   = DogCat (PATH, test  = False)

train_dataloader = DataLoader (train_data, batch_size = 32, shuffle = True)
val_dataloader = DataLoader (val_data, batch_size = 32, shuffle = False)

['cat_dog/cat.373.jpg', 'cat_dog/cat.1141.jpg', 'cat_dog/cat.837.jpg', 'cat_dog/cat.1183.jpg', 'cat_dog/cat.680.jpg', 'cat_dog/dog.1102.jpg', 'cat_dog/dog.494.jpg', 'cat_dog/cat.1415.jpg', 'cat_dog/cat.852.jpg', 'cat_dog/dog.788.jpg', 'cat_dog/dog.1432.jpg', 'cat_dog/dog.1878.jpg', 'cat_dog/dog.754.jpg', 'cat_dog/dog.1927.jpg', 'cat_dog/cat.559.jpg', 'cat_dog/cat.1177.jpg', 'cat_dog/dog.740.jpg', 'cat_dog/dog.833.jpg', 'cat_dog/cat.971.jpg', 'cat_dog/dog.649.jpg', 'cat_dog/dog.1742.jpg', 'cat_dog/cat.764.jpg', 'cat_dog/cat.243.jpg', 'cat_dog/cat.1020.jpg', 'cat_dog/cat.372.jpg', 'cat_dog/dog.1526.jpg', 'cat_dog/cat.403.jpg', 'cat_dog/dog.668.jpg', 'cat_dog/dog.1329.jpg', 'cat_dog/cat.620.jpg', 'cat_dog/cat.1434.jpg', 'cat_dog/dog.405.jpg', 'cat_dog/cat.1408.jpg', 'cat_dog/dog.774.jpg', 'cat_dog/dog.1179.jpg', 'cat_dog/dog.417.jpg', 'cat_dog/cat.769.jpg', 'cat_dog/cat.405.jpg', 'cat_dog/dog.241.jpg', 'cat_dog/dog.688.jpg', 'cat_dog/cat.190.jpg', 'cat_dog/cat.726.jpg', 'cat_dog/cat.619.j

In [8]:
print('Operation Check')
batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)

Operation Check
torch.Size([32, 3, 224, 224])
tensor([1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 1, 1])


In [9]:
# 5 定义一个损失优化器
import torch.optim as optim

criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.RMSprop(model.parameters(), lr=0.0001, alpha=0.9)

In [12]:
print ("Start Training")
for epoch in range (200):
    
    running_loss = 0.0
 #   for i, data in enumerate (train_dataloader, 0):
    for i,(inputs,labels) in enumerate(train_dataloader):
        # 1 获取数据
        inputs, labels = inputs.cuda(), labels.cuda() # add this line
        # 2 初始化梯度
        optimizer.zero_grad()
        # 3  forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # 4 打印信息
        running_loss += loss.item()
    
    print('[%d] loss: %.3f' %
          (epoch + 1,running_loss / 2000))
    print (torch.cuda.memory_cached())
    running_loss = 0.0

print('Finished Training')

Start Training
[1] loss: 0.026
1180696576
[2] loss: 0.025
1180696576
[3] loss: 0.024
1180696576
[4] loss: 0.023
1180696576
[5] loss: 0.022
1180696576
[6] loss: 0.021
1180696576
[7] loss: 0.021
1180696576
[8] loss: 0.020
1180696576
[9] loss: 0.019
1180696576
[10] loss: 0.018
1180696576
[11] loss: 0.017
1180696576
[12] loss: 0.017
1180696576
[13] loss: 0.015
1180696576
[14] loss: 0.016
1180696576
[15] loss: 0.014
1180696576
[16] loss: 0.014
1180696576
[17] loss: 0.014
1180696576
[18] loss: 0.013
1180696576
[19] loss: 0.013
1180696576
[20] loss: 0.012
1180696576
[21] loss: 0.011
1180696576
[22] loss: 0.012
1180696576
[23] loss: 0.011
1180696576
[24] loss: 0.010
1180696576
[25] loss: 0.010
1180696576
[26] loss: 0.010
1180696576
[27] loss: 0.009
1180696576
[28] loss: 0.009
1180696576
[29] loss: 0.009
1180696576
[30] loss: 0.009
1180696576
[31] loss: 0.008
1180696576
[32] loss: 0.007
1180696576
[33] loss: 0.008
1180696576
[34] loss: 0.007
1180696576
[35] loss: 0.007
1180696576
[36] loss: 0.0

In [15]:
correct = 0
total = 0
with torch.no_grad():
    for data in val_dataloader:
        images, labels = data
        images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 98 %
