In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, utils, datasets
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from skimage import io
from PIL import Image

In [2]:
EPOCH = 30                # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 12
INPUT_SIZE = 227         # img input size
LR = 0.001               # learning rate
DOWNLOAD_MNIST = False   # set to True if haven't download the data
TEST = False

In [3]:
class GroceryDataset(Dataset):
    data_transform = transforms.Compose([
        transforms.Resize(227),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    """Frocery dataset."""    
    def __init__(self, fpath = '/hostname/freiburg_groceries_dataset/images/',\
                 transform = data_transform):
        self.fpath = fpath
        self.filepath = os.path.join(self.fpath + 'path.txt')
        self.cls = []
        with open(self.filepath) as f:
            self.flines = f.readlines()
        for i in self.flines:
            cls = i.split('/')[1]
            if cls not in self.cls:
                self.cls.append(cls)
        self.length = len(self.flines)
        self.transform = transform

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        fsplit = self.flines[idx].split('/')
        #print fsplit[1], fsplit[2]
        label = torch.tensor(self.cls.index(fsplit[1]))
        #label = fsplit[1]
        image_path = self.fpath + fsplit[1] + '/' + fsplit[2].strip()
        img = self.transform(Image.open(image_path))
        #item = {'image': img, 'landmarks': label}
        return img, label

In [4]:
class CaffeNet(nn.Module):
    def __init__(self):
        super(CaffeNet, self).__init__()
        '''self.pool2x2 = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(1, 16, 5, 1, 2)
        self.conv2 = nn.Conv2d(1, 16, 5, 1, 2)
        self.fc1 = nn.Linear(16*5*5, 120)'''
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 96, 11, 4, 0), #in, out, kernel, stride, padding
            nn.ReLU(),
            nn.MaxPool2d((3, 3), 2), #kernel size, stride
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(96, 256, 5, 1, 2), #in, out, kernel, stride, padding
            nn.ReLU(),
            nn.MaxPool2d(3, 2), #kernel size, stride
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(256, 384, 3, 1, 1), #in, out, kernel, stride, padding
            nn.ReLU(),
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(384, 384, 3, 1, 1), #in, out, kernel, stride, padding
            nn.ReLU(),
        )
        self.conv5 = nn.Sequential(
            nn.Conv2d(384, 256, 3, 1, 1), #in, out, kernel, stride, padding
            nn.ReLU(),
            nn.MaxPool2d(3, 2), #kernel size, stride
        )
        self.fc6 = nn.Sequential(
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout2d(0.5)
        )
        self.fc7 = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout2d(0.5)
        )
        self.out = nn.Linear(4096, 25)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(-1, 256*6*6) #flatten data
        x = self.fc6(x)
        x = self.fc7(x)
        output = self.out(x)
        return output

In [5]:
def load_mnist_data(bt = BATCH_SIZE, fpath = '/home/arg_ws3/mnist'):
    data_transform = transforms.Compose([
            transforms.Resize(227),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    ])
    train_data = torchvision.datasets.MNIST(
        root = fpath,
        train = True, # this is training data
        transform = data_transform
    )
    train_loader = torch.utils.data.DataLoader(dataset = train_data, \
                                               batch_size = bt, \
                                               shuffle = True)
    return train_loader

In [6]:
def load_grocery_data(bt = BATCH_SIZE, fpath = '/hostname/freiburg_groceries_dataset/images'):
    data_transform = transforms.Compose([
            transforms.Resize(227),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    ])
    train_data = datasets.ImageFolder(root = fpath, \
                                           transform = data_transform)
    train_loader = torch.utils.data.DataLoader(dataset = train_data, \
                                                 batch_size = bt, \
                                                 shuffle = True)
    return train_loader

In [7]:
def load_test_data(bt = 1, dataset = 'grocery'):
    if dataset == 'grocery':
        test_loader = load_grocery_data(bt = bt)
    elif dataset == 'mnist':
        test_loader = load_mnist_data(bt = bt)
    data_iter = iter(test_loader)
    return data_iter

In [8]:
def vis_img(data):
    img = data[0][0].cpu().numpy()
    img = np.transpose(img, (1, 2, 0))
    print (img.shape)
    img = cv2.resize(img, (227, 227)) 
    print (img.shape)
    plt.imshow(img, cmap='gray')
    img = np.expand_dims(img, axis=0)
    img = np.expand_dims(img, axis=0)
    img = torch.from_numpy(img).cuda()

In [9]:
def test_caffenet(weight_path = '/hostname/freiburg_groceries_dataset/pytorch_model/net_29.pth'):
    data_iter = load_test_data(dataset = 'grocery')
    #Define model
    print('Load training model')
    caffenet = torch.load(weight_path).cuda()
    data = next(data_iter)
    outputs = caffenet(data[0].cuda())
    pred_y = torch.max(outputs, 1)[1].cpu().numpy()
    print ('Prediction: ', pred_y)
    print ('Ground truth: ', data[1].cpu().numpy())
    vis_img(data)

In [10]:
def train_caffenet():
    #Define training data
    train_loader = load_grocery_data()
    #Define model, optimizer and loss function
    print('Load training model')
    caffenet = CaffeNet().cuda()
    print (caffenet)
    #optimizer = torch.optim.Adam(caffenet.parameters(), lr = LR)   # optimize all cnn parameters
    optimizer = torch.optim.SGD(caffenet.parameters(), lr = LR, momentum=0.9, weight_decay=0.0005)
    loss_func = nn.CrossEntropyLoss().cuda()
    
    running_loss = 0.
    cnt = 0
    for epoch in range(EPOCH):
        print('========== Epoch: ', epoch, ' ==========')
        for step, data in enumerate(train_loader):
            cnt = cnt + 1
            imgs = data[0].cuda()
            labels = data[1].cuda()
            optimizer.zero_grad()
            outputs = caffenet(imgs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()
            if cnt % 50 == 0:
                print ('loss: ', loss.item())
        model_name = '/hostname/freiburg_groceries_dataset/pytorch_model/net_{}.pth'.format(epoch)
        torch.save(caffenet, model_name)
        print('----- Save model: {} -----'.format(model_name))
    print('Finish Training')

In [None]:
train_caffenet()

Load training model
CaffeNet(
  (conv1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(3, 3), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (conv4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (conv5): Sequential(
    (0): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc6): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout2d(p=0.5)
  )


  "type " + obj.__name__ + ". It won't be checked "


----- Save model: /hostname/freiburg_groceries_dataset/pytorch_model/net_0.pth -----
loss:  3.1407110691070557
loss:  3.332801103591919
loss:  3.242111921310425
loss:  3.039886236190796
loss:  3.323746919631958
loss:  3.1665589809417725
loss:  3.279120683670044
loss:  3.1043264865875244
----- Save model: /hostname/freiburg_groceries_dataset/pytorch_model/net_1.pth -----
loss:  3.191225051879883
loss:  3.131523370742798
loss:  3.1674821376800537
loss:  3.122992515563965
loss:  3.283351182937622
loss:  3.0856029987335205
loss:  3.0652849674224854
loss:  3.2953293323516846
----- Save model: /hostname/freiburg_groceries_dataset/pytorch_model/net_2.pth -----
loss:  3.2337634563446045
loss:  3.1261751651763916
loss:  3.065736770629883
loss:  3.064817428588867
loss:  3.1288020610809326
loss:  3.023529291152954
loss:  3.0274829864501953
loss:  2.9620532989501953
loss:  2.578610897064209
----- Save model: /hostname/freiburg_groceries_dataset/pytorch_model/net_3.pth -----
loss:  2.97552657127380

In [None]:
test_caffenet()