In [1]:
import tensorflow as tf
slim = tf.contrib.slim
from PIL import Image
from vgg import *
import numpy as np
import requests
from io import BytesIO
#from math import 
import time

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
import pickle as pkl
id_img_dict = pkl.load(open('id_img3.p', 'rb'))

In [3]:
import matplotlib.pyplot as plt

In [4]:
def get_image(_id, data):

    url, artist = data
    
    response = requests.get(url)
    count = 0
    while response.status_code != 200:
        if count > 25: return _id, None, None
        time.sleep(1.0)
        response = requests.get(url)
        count += 1

    try:
        img = Image.open(BytesIO(response.content))
    except OSError:
        return _id, None, None
    
    if img.mode == 'RGBA':
        tmp = Image.new("RGB", img.size, (255, 255, 255))
        tmp.paste(img, mask=img.split()[3]) # 3 is the alpha channel
        img = tmp


    width, height = img.size
    half_width, half_height = int(width/2), int(height/2)
    offset = min(half_width, half_height)
    
    sq_im = img.crop((half_width-offset, half_height-offset, half_width+offset, half_height+offset))
    #q_im.thumbnail((224, 224))


    #convert image
    sq_im = sq_im.resize((224, 224), Image.BICUBIC)
        
        
    im = np.array(sq_im, dtype='float32')
    try: 
        im = im.reshape(-1,224,224,3)
    except ValueError:
        return _id, None, None
    
    return _id, im, artist



In [5]:
checkpoint_file = './vgg_16.ckpt'

In [6]:
id_feature_dict = pkl.load(open('id_feature_dict_with_artist3.p', 'rb'))

In [7]:
input_tensor = tf.placeholder(tf.float32, shape=(None,224,224,3), name='input_image')
scaled_input_tensor = tf.scalar_mul((1.0/255), input_tensor)
scaled_input_tensor = tf.subtract(scaled_input_tensor, 0.5)
scaled_input_tensor = tf.multiply(scaled_input_tensor, 2.0)

arg_scope = vgg_arg_scope()
with slim.arg_scope(arg_scope):
    _, end_points = vgg_16(scaled_input_tensor, is_training=False)
    
sess = tf.Session()
saver = tf.train.Saver()
saver.restore(sess, checkpoint_file)
    

In [8]:
Image.MAX_IMAGE_PIXELS = None

In [9]:
from multiprocessing import Pool

In [10]:
id_img_list = [(x, id_img_dict[x]) for x in id_img_dict]

In [13]:
id_img_list = [x for x in id_img_list if x[0] not in id_feature_dict]

In [15]:
start = 0; batch_size = 64; end = start+batch_size

pool = Pool(processes=6)
id_imgs = id_img_list[start:end]
while id_imgs != []:
    
    images = pool.starmap(get_image, id_imgs)     
    images = [image for image in images if image[1] is not None]
    
    ids = [im[0] for im in images]
    im = np.squeeze([im[1] for im in images]) #I'm removing all None types.
    im = np.reshape(im, (-1, 224, 224, 3))
    artists = [im[2] for im in images]

    features = np.squeeze(sess.run(end_points['vgg_16/fc7'], feed_dict={input_tensor: im}))

    start += batch_size; end+= batch_size
    for id_, feats, artist in zip(ids, features, artists):
        id_feature_dict[id_] = (feats, artist)
    id_imgs = id_img_list[start:end]
    print('Finished with {} out of {}'.format(end, len(id_img_list)))
pool.close() 
pool.join()


Finished with 128 out of 46641
Finished with 192 out of 46641
Finished with 256 out of 46641
Finished with 320 out of 46641
Finished with 384 out of 46641
Finished with 448 out of 46641
Finished with 512 out of 46641
Finished with 576 out of 46641
Finished with 640 out of 46641
Finished with 704 out of 46641
Finished with 768 out of 46641
Finished with 832 out of 46641
Finished with 896 out of 46641
Finished with 960 out of 46641
Finished with 1024 out of 46641
Finished with 1088 out of 46641
Finished with 1152 out of 46641
Finished with 1216 out of 46641
Finished with 1280 out of 46641
Finished with 1344 out of 46641
Finished with 1408 out of 46641
Finished with 1472 out of 46641
Finished with 1536 out of 46641
Finished with 1600 out of 46641
Finished with 1664 out of 46641
Finished with 1728 out of 46641
Finished with 1792 out of 46641
Finished with 1856 out of 46641
Finished with 1920 out of 46641
Finished with 1984 out of 46641
Finished with 2048 out of 46641
Finished with 2112 out

In [16]:
pkl.dump(id_feature_dict, open('id_feature_dict_with_artist3_2.p', 'wb'))

In [None]:
sess.close()