In [1]:
import pickle
import os
import io
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import cv2
from torchvision import transforms, utils
from skimage import transform
import torchvision
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional
import matplotlib.pyplot as plt
import time

In [2]:
image_dir = r"C:\Users\Leo's PC\Desktop\images"
pickle_dir = r"D:\Pickle"

In [3]:
class AVADataset(Dataset):

    def __init__(self, pickle_dir, file_dir, transform=None):
        self.file_dir = file_dir
        self.transform = transform
        self.diction = pickle.load(open(pickle_dir, 'rb'))
        print(self.diction)
        print('length', len(self.diction))
        
    def __len__(self):
        return len(self.diction)

    def __getitem__(self, idx):
        img_name = self.diction[idx][0]
        rat_avg = self.diction[idx][1]
        directory = self.file_dir + "\\" + str(img_name) + '.jpg'
        image = cv2.imread(directory, cv2.IMREAD_COLOR)
        sample = {'image': np.array(image, dtype=float), 'rating': np.array(rat_avg, dtype=float)}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [4]:
class Rescale(object):
 """Rescale the image in a sample to a given size.

 Args:
     output_size (tuple or int): Desired output size. If tuple, output is
         matched to output_size. If int, smaller of image edges is matched
         to output_size keeping aspect ratio the same.
 """

 def __init__(self, output_size):
     assert isinstance(output_size, (int, tuple))
     self.output_size = output_size

 def __call__(self, sample):
     image, rating = sample['image'], sample['rating']

     h, w = image.shape[:2]
     if isinstance(self.output_size, int):
         if h > w:
             new_h, new_w = self.output_size * h / w, self.output_size
         else:
             new_h, new_w = self.output_size, self.output_size * w / h
     else:
         new_h, new_w = self.output_size

     new_h, new_w = int(new_h), int(new_w)

     img = transform.resize(image, (new_h, new_w))

     return {'image': img, 'rating': rating}

In [5]:
class ToTensor(object):
  """Convert ndarrays in sample to Tensors."""

  def __call__(self, sample):
      image, rating = sample['image'], sample['rating']
      rating = np.array(rating)
      # swap color axis because
      # numpy image: H x W x C
      # torch image: C X H X W
      image = image.transpose((2, 0, 1))
      return {'image': torch.from_numpy(image),
              'rating': torch.from_numpy(rating)}


In [6]:
AVA = AVADataset(pickle_dir=pickle_dir, file_dir=image_dir, transform=transforms.Compose([Rescale((299, 299)), ToTensor()]))


{153: [771257, 0], 230: [771425, 0], 331: [930790, 0], 409: [930799, 0], 425: [930258, 0], 441: [930373, 0], 509: [149446, 0], 520: [149547, 0], 534: [149545, 0], 540: [147836, 0], 555: [147906, 0], 647: [148542, 0], 669: [147996, 0], 737: [775717, 0], 766: [774992, 0], 825: [689802, 0], 834: [689755, 0], 858: [689713, 0], 922: [689698, 0], 925: [308886, 0], 958: [308647, 0], 1047: [308709, 0], 1122: [348170, 0], 1140: [350916, 0], 1225: [349745, 0], 1226: [347813, 0], 1241: [349978, 0], 1246: [349019, 0], 1257: [349646, 0], 1283: [268694, 0], 1295: [268856, 0], 1312: [266380, 0], 1347: [267236, 0], 1348: [267683, 0], 1354: [268416, 0], 1360: [268728, 0], 1373: [268057, 0], 1379: [267071, 0], 1382: [268860, 0], 1399: [917802, 0], 1434: [917722, 0], 1573: [7088, 0], 1576: [7046, 0], 1587: [7003, 0], 1588: [7089, 0], 1623: [7130, 0], 1653: [7026, 0], 1663: [6975, 0], 1673: [7029, 0], 1690: [7126, 0], 1692: [7035, 0], 1705: [7022, 0], 1714: [7041, 0], 1728: [292972, 0], 1745: [295735, 0],

In [7]:
train_loader = DataLoader(dataset=AVA, batch_size=20,shuffle=True)
val_loader = DataLoader(dataset=AVA, batch_size=20, shuffle=False)

In [8]:
inception_v3 = torchvision.models.inception_v3(pretrained=True)
inception_v3.fc = nn.Linear(in_features=inception_v3.fc.in_features, out_features=1024)

In [9]:
class LeoNet(nn.Module):
 def __init__(self):
     super(LeoNet, self).__init__()
     self.inception = inception_v3  # input shape is 299*299*3
     self.fc1 = nn.Linear(in_features=1024, out_features=512, bias=True)
     self.fc2 = nn.Linear(in_features=512, out_features=256, bias=True)
     self.fc3 = nn.Linear(in_features=256, out_features=64, bias=True)
     self.fc4 = nn.Linear(in_features=64, out_features=16, bias=True)
     self.fc5 = nn.Linear(in_features=16, out_features=1, bias=True)

 def forward(self, x):
     x, _ = inception_v3(x)
     x = functional.relu(self.fc1(x))
     x = functional.relu(self.fc2(x))
     x = functional.relu(self.fc3(x))
     x = torch.sigmoid(self.fc4(x))
     x = torch.sigmoid(self.fc5(x))
     return x

 def name(self):
     return "LeoNet"

In [10]:
model = LeoNet()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.SGD(params=model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [11]:
for epoch in range(10):
 ave_loss = 0
 for batch_idx, diction in enumerate(train_loader):
    optimizer.zero_grad()
    x, target = diction['image'], diction['rating']
    target = target.resize_(x.size()[0])
    x, target = x.float(), target.long()
    x, target = x.to(device), target.to(device)
    x, target = Variable(x), Variable(target)
    out = model(x) * 10
    if (batch_idx + 1) % 100 == 0:
        print(target)
        print(out)
        print(x.size()[0])
    loss = criterion(out, target)
    ave_loss = ave_loss * 0.9 + loss.item() * 0.1
    optimizer.step()
    if (batch_idx + 1) % 100 == 0 or (batch_idx + 1) == len(train_loader):
        print('==>>> epoch: {}, batch index: {}, train loss: {:.6f}'.format(epoch, batch_idx + 1, loss))
    loss.backward()

 correct_cnt, ave_loss = 0, 0
 total_cnt = 0
 for batch_idx, diction in enumerate(val_loader):
     x, target = diction['image'], diction['rating']
     target = target.resize_(x.size()[0])
     x, target = x.float(), target.long()
     x, target = x.to(device), target.to(device)
     x, target = Variable(x), Variable(target)
     out = model(x) * 10
     loss = criterion(out, target)
     _, pred_label = torch.max(out.data, 1)
     total_cnt += x.data.size()[0]
     correct_cnt += (pred_label == target.data).sum()
     # smooth average
     ave_loss = ave_loss * 0.9 + loss.item() * 0.1

     if (batch_idx + 1) % 100 == 0 or (batch_idx + 1) == len(val_loader):
         print(
         '==>>> epoch: {}, batch index: {}, test loss: {:.6f}, acc: {:.3f}'.format(
             epoch, batch_idx + 1, ave_loss, correct_cnt.item() * 1.0 / total_cnt))
        

tensor([1, 2, 0, 0, 2, 4, 2, 4, 1, 2, 3, 2, 3, 1, 0, 2, 3, 3, 4, 0],
       device='cuda:0') tensor([[5.0084],
        [5.0115],
        [5.0122],
        [5.0164],
        [5.0139],
        [5.0161],
        [5.0178],
        [5.0110],
        [5.0153],
        [5.0168],
        [5.0180],
        [5.0142],
        [5.0137],
        [5.0106],
        [5.0154],
        [5.0118],
        [5.0139],
        [5.0223],
        [5.0170],
        [5.0165]], device='cuda:0', grad_fn=<MulBackward0>)
20
==>>> epoch: 0, batch index: 100, train loss: -0.064073
tensor([3, 0, 3, 0, 0, 0, 1, 2, 3, 3, 2, 2, 4, 2, 0, 1, 0, 1, 0, 0],
       device='cuda:0') tensor([[5.0170],
        [5.0120],
        [5.0090],
        [5.0171],
        [5.0151],
        [5.0205],
        [5.0169],
        [5.0211],
        [5.0180],
        [5.0154],
        [5.0102],
        [5.0185],
        [5.0173],
        [5.0119],
        [5.0126],
        [5.0134],
        [5.0067],
        [5.0170],
        [5.0146],
        [5.

ValueError: Expected input batch_size (3) to match target batch_size (20).

In [None]:
for i in range(len(AVA)):
    sample = AVA[i + 1000]
    print(i, sample['image'].size(), sample['rating'].size())
    img = sample['image'][1]
    img = np.array(img)
    print(i, sample['image'].size(), sample['rating'].size())
    plt.imshow(img)
    break