In [1]:
import torch
import torchvision.transforms as trns
import torch.nn as nn
from PIL import Image
from scipy.io import loadmat
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import glob
import numpy
import torch.nn.functional as F

In [2]:
class humanDataset(Dataset):
    def __init__(self, root, split, transform):
        # --------------------------------------------
        # Initialize paths, transforms, and so on
        # --------------------------------------------
        self.transform = transform

        # Load image path and annotations
#         mat = loadmat(f'{root}/{split}_list.mat', squeeze_me=True)
#         self.imgs = mat['file_list']
#         self.data = [f'{root}/{i}' for i in self.imgs]
    
        self.imgs = glob.glob(f'{root}/{split}/*.jpg')
        
        self.lbls = []
    
        for i in self.imgs:
            self.lbls.append(int(i.split('_')[1]))
        
            
        self.lbls = numpy.array(self.lbls, dtype='uint8')


        assert len(self.imgs) == len(self.lbls), 'mismatched length!'
        print('Total data in {} split: {}'.format(split, len(self.imgs)))


    def __getitem__(self, index):
        # --------------------------------------------
        # 1. Read from file (using numpy.fromfile, PIL.Image.open)
        # 2. Preprocess the data (torchvision.Transform)
        # 3. Return the data (e.g. image and label)
        # --------------------------------------------
        imgpath = self.imgs[index]
        img = Image.open(imgpath).convert('RGB')
        lbl = int(self.lbls[index])
        if self.transform is not None:
            img = self.transform(img)
        return img, lbl

    def __len__(self):
        # --------------------------------------------
        # Indicate the total size of the dataset
        # --------------------------------------------
        return len(self.imgs)

In [3]:
# Create train/valid transforms
train_transform = trns.Compose([
    trns.Resize((256, 256)),
    trns.RandomCrop((224, 224)),
    trns.RandomHorizontalFlip(),
    trns.ToTensor(),
    trns.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
test_transform = trns.Compose([
    trns.Resize((224, 224)),
    trns.ToTensor(),
    trns.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])



In [4]:
# Create train/valid datasets
train_set = humanDataset(root='./dataset/humanDataset',
                       split='train', transform=train_transform)
test_set = humanDataset(root='./dataset/humanDataset',
                       split='test', transform=test_transform)

Total data in train split: 20856
Total data in test split: 3250


In [5]:
# Create train/valid loaders
train_loader = DataLoader(
    dataset=train_set, batch_size=16, shuffle=True, num_workers=0)
test_loader = DataLoader(
    dataset=test_set, batch_size=16, shuffle=False, num_workers=0)

In [6]:
# Get images and labels in a mini-batch of train_loader
for imgs, lbls in train_loader:
    print(lbls)
    print('Size of image:', imgs.size())  # batch_size * 3 * 224 * 224
    print('Type of image:', imgs.dtype)   # float32
    print('Size of label:', lbls.size())  # batch_size
    print('Type of label:', lbls.dtype)   # int64(long)
    break

tensor([0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1])
Size of image: torch.Size([16, 3, 224, 224])
Type of image: torch.float32
Size of label: torch.Size([16])
Type of label: torch.int64


In [7]:
# (weight-kernel+1) 無條件進位
class CNN_model(nn.Module):
    #列出需要哪些層
    def __init__(self):
        super(CNN_model, self).__init__()
        # Convolution 1 , input_shape=(3,224,224)
        self.cnn1 = nn.Conv2d(3, 16, kernel_size=5, stride=1) 
        # output_shape=((224–5+1)/(1+1)=110) (16,110,110)
        self.relu1 = nn.ReLU(inplace=True) 
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        # output_shape=(16,55,55)
        # Convolution 2 , 
        self.cnn2 = nn.Conv2d(16,8, kernel_size=11, stride=1) 
        self.relu2 = nn.ReLU(inplace=True) 
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        # Fully connected 1 ,#input_shape=(8*50*50)
        self.fc = nn.Linear(8 * 50 * 50, 2)     
    #列出forward的路徑，將init列出的層代入
    def forward(self, x):
        out = self.cnn1(x) 
        out = self.relu1(out)
        out = self.maxpool1(out)
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        out = out.view(out.size(0), -1) 
        out = self.fc(out) 
        return out

In [8]:
net = CNN_model()
net.cuda()

CNN_model(
  (cnn1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU(inplace=True)
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cnn2): Conv2d(16, 8, kernel_size=(11, 11), stride=(1, 1))
  (relu2): ReLU(inplace=True)
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=20000, out_features=2, bias=True)
)

In [9]:
device = torch.device("cuda")
# loss_fn = nn.NLLLoss() # One way to calculate loss
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(),lr=0.001,momentum=0.9)

In [10]:
# Train
for epoch in range(100):
    running_loss = 0.0

    for times, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        if times % 100 == 99 or times+1 == len(train_loader):
            print('[%d/%d, %d/%d] loss: %.3f' % (epoch+1, 100, times+1, len(train_loader), running_loss/2000))

print('Finished Training')

[1/100, 100/1304] loss: 0.035
[1/100, 200/1304] loss: 0.068
[1/100, 300/1304] loss: 0.102
[1/100, 400/1304] loss: 0.135
[1/100, 500/1304] loss: 0.168
[1/100, 600/1304] loss: 0.201
[1/100, 700/1304] loss: 0.234
[1/100, 800/1304] loss: 0.266
[1/100, 900/1304] loss: 0.297
[1/100, 1000/1304] loss: 0.328
[1/100, 1100/1304] loss: 0.359
[1/100, 1200/1304] loss: 0.390
[1/100, 1300/1304] loss: 0.421
[1/100, 1304/1304] loss: 0.422
[2/100, 100/1304] loss: 0.030
[2/100, 200/1304] loss: 0.061
[2/100, 300/1304] loss: 0.090
[2/100, 400/1304] loss: 0.120
[2/100, 500/1304] loss: 0.150
[2/100, 600/1304] loss: 0.178
[2/100, 700/1304] loss: 0.208
[2/100, 800/1304] loss: 0.237
[2/100, 900/1304] loss: 0.267
[2/100, 1000/1304] loss: 0.295
[2/100, 1100/1304] loss: 0.322
[2/100, 1200/1304] loss: 0.350
[2/100, 1300/1304] loss: 0.380
[2/100, 1304/1304] loss: 0.381
[3/100, 100/1304] loss: 0.028
[3/100, 200/1304] loss: 0.056
[3/100, 300/1304] loss: 0.083
[3/100, 400/1304] loss: 0.112
[3/100, 500/1304] loss: 0.140


[20/100, 100/1304] loss: 0.022
[20/100, 200/1304] loss: 0.044
[20/100, 300/1304] loss: 0.064
[20/100, 400/1304] loss: 0.085
[20/100, 500/1304] loss: 0.107
[20/100, 600/1304] loss: 0.127
[20/100, 700/1304] loss: 0.147
[20/100, 800/1304] loss: 0.169
[20/100, 900/1304] loss: 0.191
[20/100, 1000/1304] loss: 0.212
[20/100, 1100/1304] loss: 0.233
[20/100, 1200/1304] loss: 0.253
[20/100, 1300/1304] loss: 0.274
[20/100, 1304/1304] loss: 0.276
[21/100, 100/1304] loss: 0.021
[21/100, 200/1304] loss: 0.042
[21/100, 300/1304] loss: 0.064
[21/100, 400/1304] loss: 0.084
[21/100, 500/1304] loss: 0.106
[21/100, 600/1304] loss: 0.126
[21/100, 700/1304] loss: 0.147
[21/100, 800/1304] loss: 0.170
[21/100, 900/1304] loss: 0.192
[21/100, 1000/1304] loss: 0.212
[21/100, 1100/1304] loss: 0.232
[21/100, 1200/1304] loss: 0.253
[21/100, 1300/1304] loss: 0.274
[21/100, 1304/1304] loss: 0.275
[22/100, 100/1304] loss: 0.021
[22/100, 200/1304] loss: 0.042
[22/100, 300/1304] loss: 0.063
[22/100, 400/1304] loss: 0.08

[38/100, 1100/1304] loss: 0.210
[38/100, 1200/1304] loss: 0.230
[38/100, 1300/1304] loss: 0.249
[38/100, 1304/1304] loss: 0.250
[39/100, 100/1304] loss: 0.019
[39/100, 200/1304] loss: 0.038
[39/100, 300/1304] loss: 0.055
[39/100, 400/1304] loss: 0.074
[39/100, 500/1304] loss: 0.094
[39/100, 600/1304] loss: 0.114
[39/100, 700/1304] loss: 0.134
[39/100, 800/1304] loss: 0.154
[39/100, 900/1304] loss: 0.172
[39/100, 1000/1304] loss: 0.192
[39/100, 1100/1304] loss: 0.212
[39/100, 1200/1304] loss: 0.231
[39/100, 1300/1304] loss: 0.250
[39/100, 1304/1304] loss: 0.251
[40/100, 100/1304] loss: 0.019
[40/100, 200/1304] loss: 0.039
[40/100, 300/1304] loss: 0.060
[40/100, 400/1304] loss: 0.078
[40/100, 500/1304] loss: 0.095
[40/100, 600/1304] loss: 0.116
[40/100, 700/1304] loss: 0.134
[40/100, 800/1304] loss: 0.153
[40/100, 900/1304] loss: 0.172
[40/100, 1000/1304] loss: 0.191
[40/100, 1100/1304] loss: 0.211
[40/100, 1200/1304] loss: 0.229
[40/100, 1300/1304] loss: 0.249
[40/100, 1304/1304] loss: 

[57/100, 700/1304] loss: 0.126
[57/100, 800/1304] loss: 0.143
[57/100, 900/1304] loss: 0.161
[57/100, 1000/1304] loss: 0.179
[57/100, 1100/1304] loss: 0.197
[57/100, 1200/1304] loss: 0.215
[57/100, 1300/1304] loss: 0.235
[57/100, 1304/1304] loss: 0.236
[58/100, 100/1304] loss: 0.019
[58/100, 200/1304] loss: 0.037
[58/100, 300/1304] loss: 0.056
[58/100, 400/1304] loss: 0.075
[58/100, 500/1304] loss: 0.094
[58/100, 600/1304] loss: 0.112
[58/100, 700/1304] loss: 0.130
[58/100, 800/1304] loss: 0.149
[58/100, 900/1304] loss: 0.167
[58/100, 1000/1304] loss: 0.185
[58/100, 1100/1304] loss: 0.203
[58/100, 1200/1304] loss: 0.221
[58/100, 1300/1304] loss: 0.239
[58/100, 1304/1304] loss: 0.240
[59/100, 100/1304] loss: 0.018
[59/100, 200/1304] loss: 0.038
[59/100, 300/1304] loss: 0.056
[59/100, 400/1304] loss: 0.073
[59/100, 500/1304] loss: 0.091
[59/100, 600/1304] loss: 0.109
[59/100, 700/1304] loss: 0.128
[59/100, 800/1304] loss: 0.146
[59/100, 900/1304] loss: 0.162
[59/100, 1000/1304] loss: 0.1

[76/100, 300/1304] loss: 0.052
[76/100, 400/1304] loss: 0.069
[76/100, 500/1304] loss: 0.087
[76/100, 600/1304] loss: 0.105
[76/100, 700/1304] loss: 0.122
[76/100, 800/1304] loss: 0.139
[76/100, 900/1304] loss: 0.157
[76/100, 1000/1304] loss: 0.174
[76/100, 1100/1304] loss: 0.192
[76/100, 1200/1304] loss: 0.210
[76/100, 1300/1304] loss: 0.226
[76/100, 1304/1304] loss: 0.227
[77/100, 100/1304] loss: 0.017
[77/100, 200/1304] loss: 0.035
[77/100, 300/1304] loss: 0.052
[77/100, 400/1304] loss: 0.070
[77/100, 500/1304] loss: 0.088
[77/100, 600/1304] loss: 0.105
[77/100, 700/1304] loss: 0.122
[77/100, 800/1304] loss: 0.140
[77/100, 900/1304] loss: 0.158
[77/100, 1000/1304] loss: 0.174
[77/100, 1100/1304] loss: 0.192
[77/100, 1200/1304] loss: 0.210
[77/100, 1300/1304] loss: 0.228
[77/100, 1304/1304] loss: 0.229
[78/100, 100/1304] loss: 0.017
[78/100, 200/1304] loss: 0.035
[78/100, 300/1304] loss: 0.054
[78/100, 400/1304] loss: 0.072
[78/100, 500/1304] loss: 0.090
[78/100, 600/1304] loss: 0.10

[94/100, 1300/1304] loss: 0.219
[94/100, 1304/1304] loss: 0.220
[95/100, 100/1304] loss: 0.016
[95/100, 200/1304] loss: 0.032
[95/100, 300/1304] loss: 0.049
[95/100, 400/1304] loss: 0.066
[95/100, 500/1304] loss: 0.082
[95/100, 600/1304] loss: 0.099
[95/100, 700/1304] loss: 0.115
[95/100, 800/1304] loss: 0.133
[95/100, 900/1304] loss: 0.151
[95/100, 1000/1304] loss: 0.168
[95/100, 1100/1304] loss: 0.184
[95/100, 1200/1304] loss: 0.201
[95/100, 1300/1304] loss: 0.218
[95/100, 1304/1304] loss: 0.218
[96/100, 100/1304] loss: 0.017
[96/100, 200/1304] loss: 0.034
[96/100, 300/1304] loss: 0.050
[96/100, 400/1304] loss: 0.067
[96/100, 500/1304] loss: 0.083
[96/100, 600/1304] loss: 0.100
[96/100, 700/1304] loss: 0.118
[96/100, 800/1304] loss: 0.135
[96/100, 900/1304] loss: 0.153
[96/100, 1000/1304] loss: 0.170
[96/100, 1100/1304] loss: 0.188
[96/100, 1200/1304] loss: 0.205
[96/100, 1300/1304] loss: 0.221
[96/100, 1304/1304] loss: 0.221
[97/100, 100/1304] loss: 0.017
[97/100, 200/1304] loss: 0.

In [11]:
# Test
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test inputs: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test inputs: 86 %


In [12]:
torch.save(net.state_dict(), "./race_model.pt")