In [16]:
import pandas as pd
import requests
import re
import os
import glob
import numpy as np

# from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

### Pre-trained model

In [18]:
# base_model = VGG16(weights='imagenet', input_shape=(60,60,3))
# model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)
base_model = ResNet50V2(include_top=False, weights='imagenet', input_shape=(60,60,3), pooling='avg')
image_size = (60, 60)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [21]:
img = image.load_img('./gift_cards_images_resized/B00MV9I6QS.jpg', target_size=image_size)
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
feature = base_model.predict(x)
feature

array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [25]:
feature.shape

(1, 2048)

### Generate Embeddings

In [26]:
filenames = glob.glob('./gift_cards_images_resized/*jpg')

features = []
item_id = []

for i, filename in enumerate(filenames):
    try:
#         print(filename)
        img = image.load_img(filename, target_size=image_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # extract features  
        feature = base_model.predict(x)
        flat = feature.flatten()
        features.append(flat)
        item_id.append(filename.split('/')[-1].split('.')[0])
    except:
        continue
    if i % 100 == 0:
        print ("Processed image: " + str(i))

Processed image: 0
Processed image: 100
Processed image: 200
Processed image: 300
Processed image: 400
Processed image: 500
Processed image: 600


In [27]:
items = pd.DataFrame({'item_id': item_id, 'image_features': features})
items

Unnamed: 0,item_id,image_features
0,B018U5OE5S,"[0.0, 0.0, 1.0194079, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,B00H5BOA4O,"[0.0, 0.0, 0.09564662, 0.0, 0.0, 0.0, 0.599519..."
2,B00YD74M4Q,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.46861327, 0.0..."
3,B00P9VF3DE,"[0.0, 0.071196795, 4.2170134, 0.0, 0.0, 0.0, 0..."
4,B005VUPI7O,"[0.0, 0.7706065, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
5,B00GXSDG2C,"[0.0, 0.0, 0.09564662, 0.0, 0.0, 0.0, 0.599519..."
6,B00H5BO0DK,"[0.0, 0.0, 0.09564662, 0.0, 0.0, 0.0, 0.599519..."
7,B00CT7B2KO,"[0.0, 0.0, 0.09564662, 0.0, 0.0, 0.0, 0.599519..."
8,B00CT7DJZ0,"[0.0, 0.0, 0.09564662, 0.0, 0.0, 0.0, 0.599519..."
9,B00BXLSSOU,"[0.0, 2.3521254, 0.0, 0.0, 0.08767879, 0.0, 0...."


In [28]:
# out_path = './image_vecs_vgg16.pkl'
out_path = './image_vecs_resnet50v2.pkl'
items.to_pickle(out_path)