In [27]:
import torch
from torch.utils.data import Dataset, DataLoader
import json
from tqdm import tqdm
import pprint
from torchvision import transforms, utils
from tqdm import tqdm_notebook as tqdm_nb
# import torch
import torchvision.transforms as transforms
import torch.utils.data.sampler as smp
# dataset
import numpy as np

#import configure as cf
from PIL import Image

def class2id(name):
    if(name == 'food'):
        return 0
    elif (name == 'inside'):
        return 1
    elif (name == 'outside'):
        return 2
    elif (name == 'drink'):
        return 3
    elif (name == 'menu'):
        return 4
    else:
        return 5
    

class YelpDataSet(torch.utils.data.Dataset):
    def __init__(self, photo_dir,transform=None):
        self.photo_dir = photo_dir + '/photo'
        self.photo_json_dir = photo_dir + '/photo.json'
        
        self.transform = transform
        self.photo_id = []
        self.labels = []
        
        f = open(self.photo_json_dir, "r")
        for line in f.readlines():
            dic = json.loads(line)
            self.photo_id.append(dic['photo_id'])
            self.labels.append(class2id(dic['label']))

    def __len__(self):
        return len(self.photo_id)
    
    def __getitem__(self, idx):
        img_address = self.photo_dir + '/' + self.photo_id[idx] + '.jpg'
        image = Image.open(img_address).convert('RGB')
        if self.transform:
            image = self.transform(image)
        # dic = {'image': image, 'stars': float(self.photo_id[idx]['stars'])}
        label = self.labels[idx]
        return image, label
    
image= YelpDataSet(photo_dir = "/Users/xin/Downloads",transform=None)
print image
num_train = len(image)
indices = list(range(num_train))


set_sum = num_train

split = int(np.floor(0.1 * set_sum))

np.random.seed(32)
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:set_sum], indices[:split]

train_sampler = smp.SubsetRandomSampler(train_idx)
valid_sampler = smp.SubsetRandomSampler(valid_idx)
print train_sampler



<__main__.YelpDataSet object at 0x10c1d1dd0>
<torch.utils.data.sampler.SubsetRandomSampler object at 0x10c0f3290>


In [39]:
def get_train_valid_loader(photo_dir,
                               batch_size=1,
                               random_seed=32,
                               transform=None,
                               valid_size=0.1,
                               set_num = -1,
                               shuffle=True,
                               num_workers=4,
                               pin_memory=False):

        #error_msg = "[!] valid_size should be in the range [0, 1]."
        #assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

        # load the dataset
        yelpDataset = YelpDataSet(photo_dir, transform)
        num_train = len(yelpDataset)
        indices = list(range(num_train))
        if set_num == -1:
            set_sum = num_train
        else:
            set_sum = set_num
        split = int(np.floor(valid_size * set_sum))

        if shuffle:
            np.random.seed(random_seed)
            np.random.shuffle(indices)

        train_idx, valid_idx = indices[split:set_sum], indices[:split]

        train_sampler = smp.SubsetRandomSampler(train_idx)
        valid_sampler = smp.SubsetRandomSampler(valid_idx)

        train_loader = torch.utils.data.DataLoader(yelpDataset,
                                                   batch_size=batch_size, sampler=train_sampler,
                                                   num_workers=num_workers, pin_memory=pin_memory)

        valid_loader = torch.utils.data.DataLoader(yelpDataset,
                                                   batch_size=batch_size, sampler=valid_sampler,
                                                   num_workers=num_workers, pin_memory=pin_memory)

        return train_loader, valid_loader
    
imgTransform = transforms.Compose([transforms.Scale((224)),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor()])
#dataLoader = DataLoader()
trainLoader,valLoader = get_train_valid_loader(photo_dir = "/home/jin/Downloads",transform=imgTransform)



In [44]:
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

resnet = models.resnet18(pretrained = True)
network = resnet
#from tqdm import tqdm as tqdm
# Try this if the above gives trouble: from tqdm import tqdm_notebook as tqdm

learningRate = 1e-2  # Single learning rate for this lab.


#Definition of our loss.
criterion = nn.CrossEntropyLoss()

# Definition of optimization strategy.
optimizer = optim.SGD(network.parameters(), lr = learningRate)

def train_model(network, criterion, optimizer, trainLoader, valLoader, n_epochs = 10, use_gpu = False):
    if use_gpu:
        network = network.cuda()
        criterion = criterion.cuda()
        
    # Training loop.
    for epoch in range(0, n_epochs):
        correct = 0.0
        cum_loss = 0.0
        counter = 0

        # Make a pass over the training data.
        t = tqdm(trainLoader, desc = 'Training epoch %d' % epoch)
        network.train()  # This is important to call before training!
        for (i, (inputs, labels)) in enumerate(t):

            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # Backward pass:
            optimizer.zero_grad()
            # Loss is a variable, and calling backward on a Variable will
            # compute all the gradients that lead to that Variable taking on its
            # current value.
            loss.backward() 

            # Weight and bias updates.
            optimizer.step()

            # logging information.
            cum_loss += loss.data[0]
            max_scores, max_labels = outputs.data.max(1)
            correct += (max_labels == labels.data).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)

        # Make a pass over the validation data.
        correct = 0.0
        cum_loss = 0.0
        counter = 0
        t = tqdm(valLoader, desc = 'Validation epoch %d' % epoch)
        network.eval()  # This is important to call before evaluating!
        for (i, (inputs, labels)) in enumerate(t):

            # Wrap inputs, and targets into torch.autograd.Variable types.
            inputs = Variable(inputs)
            labels = Variable(labels)
            
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # Forward pass:
            outputs = network(inputs)
            loss = criterion(outputs, labels)

            # logging information.
            cum_loss += loss.data[0]
            max_scores, max_labels = outputs.data.max(1)
            correct += (max_labels == labels.data).sum()
            counter += inputs.size(0)
            t.set_postfix(loss = cum_loss / (1 + i), accuracy = 100 * correct / counter)
            
train_model(network, criterion, optimizer, trainLoader, valLoader, n_epochs = 0, use_gpu = True)           
# Train the previously defined model.
#loss_trn, loss_vld, accuracy_trn, accuracy_vld = train_model_nogpu(network, criterion, optimizer, trainLoader, valLoader, n_epochs = 1)





Training epoch 0:   0%|          | 0/2 [00:00<?, ?it/s][A
Training epoch 0:   0%|          | 0/2 [00:00<?, ?it/s, accuracy=0, loss=7.8][A
Training epoch 0:  50%|█████     | 1/2 [00:00<00:00,  2.10it/s, accuracy=0, loss=7.8][A
Training epoch 0:  50%|█████     | 1/2 [00:00<00:00,  1.34it/s, accuracy=0, loss=7.82][A
Training epoch 0: 100%|██████████| 2/2 [00:00<00:00,  2.41it/s, accuracy=0, loss=7.82][A
[A
Validation epoch 0: 0it [00:00, ?it/s][A


RuntimeError: invalid argument 1: must be strictly positive at /Users/soumith/code/builder/wheel/pytorch-src/torch/lib/TH/generic/THTensorMath.c:2033