In [2]:
#食物分类 总共11类
#import
import numpy as np
import torch 
import cv2
import torch.nn as nn 
import pandas as pd
import os
from torch.utils.data import DataLoader,Dataset
import torchvision.transforms as transforms
import time
import torch.nn.functional as F

In [3]:
#data process
#Read image
def readfile(path,label):
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
    y = np.zeros((len(image_dir)),dtype=np.uint8)
    for i,file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path,file))
        x[i,:,:] = cv2.resize(img,(128,128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x,y 
    else:
        return x

In [4]:
workspace_dir = './data'
print("reading data...")
train_x,train_y = readfile(os.path.join(workspace_dir,"training"),True)
print("Size of training data  = {}".format(len(train_x)))
val_x,val_y = readfile(os.path.join(workspace_dir,"validation"),True)
print("Size of validation data  = {}".format(len(val_x)))
test_x= readfile(os.path.join(workspace_dir,"testing"),False)
print("Size of Testing data = {}".format(len(test_x)))
print("Over")

reading data...
Size of training data  = 9866
Size of validation data  = 3430
Size of Testing data = 3347
Over


# Dataset
## 在 PyTorch 中，我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來"包裝" data，使後續的 training 及 testing 更為方便。

## Dataset 需要 overload 兩個函數：\_\_len\_\_ 及 \_\_getitem\_\_

## \_\_len\_\_ 必須要回傳 dataset 的大小，而 \_\_getitem\_\_ 則定義了當程式利用 [ ] 取值時，dataset 應該要怎麼回傳資料。

## 實際上我們並不會直接使用到這兩個函數，但是使用 DataLoader 在 enumerate Dataset 時會使用到，沒有實做的話會在程式運行階段出現 error。


In [5]:
# training 時做 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])
class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [17]:
batch_size=2
train_set = ImgDataset(train_x,train_y,train_transform)
val_set = ImgDataset(val_x,val_y,test_transform)
train_loader = DataLoader(train_set,batch_size = batch_size,shuffle=True)
val_loader = DataLoader(val_set,batch_size=batch_size,shuffle=False)

In [19]:
#model
class Classfication(nn.Module):
    def __init__(self):
        super(Classfication,self).__init__()
        #tensor 128* 128*128*3
        self.conv1 = nn.Conv2d(3,64,3)
        #126*126
        self.conv2 = nn.Conv2d(64,128,3)
        self.conv3 = nn.Conv2d(128,64,3)
        #128 *20 * 30 *30
        self.fc1 = nn.Linear(64*14*14,1024)
        self.fc2 = nn.Linear(1024,512)
        self.fc3 = nn.Linear(512,11)
    
    def forward(self,x):
        in_size = x.size(0)
        #126
        out = self.conv1(x)
        out = F.relu(out)
        out = F.max_pool2d(out,2)
        # 63
        out = self.conv2(out)
        # 61
        out = F.relu(out)
        out = F.max_pool2d(out,2)
        # 如果向下取整 30 
        out = self.conv3(out)
        # 28
        out = F.relu(out)
        out = F.max_pool2d(out,2)
        #14
        # print(out.shape)
        out = out.view(in_size,-1)
        # print(out.shape)
        out = self.fc1(out)
        out = F.relu(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.fc3(out)
        # out = F.log_softmax(out,dim=1)
        return out


In [21]:
#model 2 
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [25]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3,64,5),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(64,128,5),
            nn.ReLU()
        )
        #3x3 池化
        self.max_pool1 = nn.MaxPool2d(2,2,0)
        self.max_pool2 = nn.MaxPool2d(2,2,0)
        self.fc1 = nn.Linear(128*28*28,1000)
        self.fc2 = nn.Linear(1000,11)
    
    def forward(self,x):
        out = self.conv1(x)
        #batch 64 124 124
        out = self.max_pool1(out)
        #64 62 62 
        out = self.conv2(out)
        #64 58 58
        out = self.max_pool2(out)
        #128 29 29
        out = out.view(out.shape[0],-1)
        # print(out.shape)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

class DNN(nn.Module):
    def __init__(self):
        super(DNN,self).__init__()
        #batch *3 *128*128
        self.fc1 =nn.Linear(49152,64*64)
        self.fc3 =nn.Linear(64*64,32*32)
        self.fc4 =nn.Linear(32*32,16*16)
        self.fc6 =nn.Linear(16*16,11)

    def forward(self,x):
        in_size = x.size(0)
        x = x.view(in_size,49152)
        # print(x.shape)
        out = F.relu(self.fc1(x))
        out = F.relu(self.fc3(out))
        out = F.relu(self.fc4(out))
        out = self.fc6(out)
        return out


# dnn.eval()
#setting 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
# print(device)
model = Classifier().to(device)
cirection = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
# model.eval()

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 3.04 GiB already allocated; 0 bytes free; 3.04 GiB reserved in total by PyTorch)

In [10]:
#train
# model.train()
epochs = 30
for epoch in range(epochs):
    epoch_start_time = time.time()
    train_acc =0.0
    val_acc =0.0
    train_loss = 0.0
    val_loss =0.0
    model.train()
    for i,data in enumerate(train_loader):
        optimizer.zero_grad()
        x,y = data[0].to(device),data[1].to(device)
        # print(x.shape)
        y_pred = model(x)
        loss = cirection(y_pred,y.long())
        loss.backward()
        optimizer.step()
        train_acc +=np.sum(np.argmax(y_pred.cpu().data.numpy(),axis=1)== y.cpu().numpy())
        train_loss +=loss.item()
    model.eval()
    with torch.no_grad():
        for i,data in enumerate(val_loader):
            valx ,valy = data[0].to(device),data[1].to(device)
            val_pred = model(valx)
            batch_loss = cirection(val_pred,valy.long())
            val_acc +=np.sum(np.argmax(val_pred.cpu().data.numpy(),axis=1)== valy.cpu().numpy())
            val_loss +=batch_loss.item()

        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, epochs, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))
    model.train()

RuntimeError: CUDA out of memory. Tried to allocate 768.00 MiB (GPU 0; 4.00 GiB total capacity; 3.03 GiB already allocated; 0 bytes free; 3.04 GiB reserved in total by PyTorch)

In [None]:
#train and val 共同训练
train_val_x = np.concatenate((train_x,val_x),axis=0)
train_val_y = np.concatenate((train_y,val_y),axis=0)
train_val_set =ImgDataset(train_val_x,train_val_y,train_transform)
train_val_loader = DataLoader(train_val_set,batch_size=batch_size,shuffle=True)

In [None]:
model_best = Classfication.cuda()
epochs = 30
for epoch in range(epochs):
    epoch_start_time = time.time()
    train_acc =0.0
    val_acc =0.0
    train_loss = 0.0
    val_loss =0.0
    model_best.train()
    for i,data in enumerate(train_val_loader):
        optimizer.zero_grad()
        x,y = data[0].to(device),data[1].to(device)
        y_pred = model_best(x)
        loss = cirection(y_pred,y.long())
        train_loss +=loss
        loss.backward()
        optimizer.step()
        train_acc +=np.sum(np.argmax(y_pred.cpu().data.numpy(),axis=1)== y.cpu().numpy())
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f '
    (epoch + 1, epochs, time.time()-epoch_start_time, \
        train_acc/train_set.__len__(), train_loss/train_set.__len__())

In [None]:
#简单cnn 验证集 47% 准确率
#尝试 助教版 cnn 

#alexnet
torch.save(model,'nornn.pth')

In [None]:
test_set = ImgDataset(test_x,transform = test_transform)
test_loader = DataLoader(test_set,batch_size = batch_size,shuffle= False)

In [None]:

model.eval()
prediction = []
with torch.no_grad():
    for i,data in enumerate(test_loader):
        test_pred = model(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(),axis=1)
        for y in test_label:
            prediction.append(y)


In [None]:
#save
with open("predict.csv",'w') as f:
    f.write('ID,Category\n')
    for i,y in enumerate(prediction):
        f.write('{},{}\n'.format(i,y))
        