In [49]:
import cv2
import numpy as np
import scipy
from scipy.misc import imread
import cloudpickle as pickle
import random
import os
import matplotlib.pyplot as plt

In [89]:
# Feature extractor
def extract_features(image_path, vector_size=32):
    image = imread(image_path, mode="RGB")
    try:
        # Using KAZE, cause SIFT, ORB and other was moved to additional module
        # which is adding addtional pain during install
        alg = cv2.KAZE_create()
        # Dinding image keypoints
        kps = alg.detect(image)
        # Getting first 32 of them. 
        # Number of keypoints is varies depend on image size and color pallet
        # Sorting them based on keypoint response value(bigger is better)
        kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
        # computing descriptors vector
        kps, dsc = alg.compute(image, kps)
        # Flatten all of them in one big vector - our feature vector
        dsc = dsc.flatten()
        # Making descriptor of same size
        # Descriptor vector size is 64
        needed_size = (vector_size * 64)
        if dsc.size < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
            dsc = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    except cv2.error as e:
        print('Error: ', e)
        return None

    return dsc


def batch_extractor(images_path, pickled_db_path="features.pck"):
    files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]

    result = {}
    for f in files:
        print('Extracting features from image %s' % f)
        name = f.split('/')[-1].lower()
        result[name] = extract_features(f)
    return result

In [None]:
extract_features('Data')

In [90]:
images_path = 'Dataset/'
files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]
# getting 3 random images 
sample = random.sample(files, 1)

batch_extractor(images_path)

Extracting features from image Dataset/images.jpeg


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until


{'images.jpeg': array([-0.08080703, -0.01050984,  0.08136856, ..., -0.00741246,
         0.10179509,  0.00747952], dtype=float32)}

In [104]:
s1 = sum(matrix[0])/len(matrix[0])
s1 = sum(matrix[1])/len(matrix[1])
v1 = np.std(matrix[0])
v2 = np.std(matrix[1])
m1 = matrix[0]
m2 = matrix[1]
m1 = (m1 - s1)/v1
m2 = (m2 - s2)/v2

for i in range(len(m1)):    

array([[  3.37502696e-02],
       [ -8.90203640e-02],
       [  3.41619812e-02],
       ..., 
       [  5.36635889e-05],
       [  3.74264491e-05],
       [  5.50459081e-05]], dtype=float32)

In [44]:
class Matcher(object):

    def __init__(self, pickled_db_path="features.pck"):
        with open(pickled_db_path) as fp:
            self.data = pickle.load(fp)
        self.names = []
        self.matrix = []
        for k, v in self.data.iteritems():
            self.names.append(k)
            self.matrix.append(v)
        self.matrix = np.array(self.matrix)
        self.names = np.array(self.names)

    def cos_cdist(self, vector):
        # getting cosine distance between search image and images database
        v = vector.reshape(1, -1)
        return scipy.spatial.distance.cdist(self.matrix, v, 'cosine').reshape(-1)

    def match(self, image_path, topn=5):
        features = extract_features(image_path)
        img_distances = self.cos_cdist(features)
        # getting top 5 records
        nearest_ids = np.argsort(img_distances)[:topn].tolist()
        nearest_img_paths = self.names[nearest_ids].tolist()

        return nearest_img_paths, img_distances[nearest_ids].tolist()

In [51]:
def show_img(path):
    img = imread(path, mode="RGB")
    plt.imshow(img)
    plt.show()
    
def run():
    images_path = 'Dataset/'
    files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]
    # getting 3 random images 
    sample = random.sample(files, 1)
    
    batch_extractor(images_path)

    ma = Matcher('features.pck')
    
    for s in sample:
        print('Query image ==========================================')
        show_img(s)
        names, match = ma.match(s, topn=1)
        print('Result images ========================================')
        for i in range(3):
            # we got cosine distance, less cosine distance between vectors
            # more they similar, thus we subtruct it from 1 to get match value
            print ('Match %s' % (1-match[i]))
            show_img(os.path.join(images_path, names[i]))

run()

Extracting features from image Dataset/coke_bottle.jpeg
Extracting features from image Dataset/images.jpeg


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until


TypeError: write() argument must be str, not bytes

In [87]:
img1 = matrix[0].transpose()
img2 = matrix[1]
sim = 0
for i in range(len(matrix[0])):
    sim += matrix[0][i]*matrix[1][i]
sim

18.914072407544399

In [70]:
images_path = 'Dataset/'
files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path))]

result = {}
for f in files:
    print('Extracting features from image %s' % f)
    name = f.split('/')[-1].lower()
    result[name] = extract_features(f)

    
names = []
matrix = []
for k, v in result.items():
    names.append(k)
    matrix.append(v)
matrix = np.array(matrix)
names = np.array(names)

Extracting features from image Dataset/coke_bottle.jpeg
Extracting features from image Dataset/images.jpeg


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until


In [73]:
matrix[]

array([  3.37502696e-02,  -8.90203640e-02,   3.41619812e-02, ...,
         5.36635889e-05,   3.74264491e-05,   5.50459081e-05], dtype=float32)

In [54]:
#with open(pickled_db_path, 'w') as fp:
#    pickle.dump(result, fp)

Extracting features from image Dataset/coke_bottle.jpeg
Extracting features from image Dataset/images.jpeg


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  This is separate from the ipykernel package so we can avoid doing imports until


TypeError: write() argument must be str, not bytes

In [None]:
225*225*a

In [132]:
import numpy as np
from keras.preprocessing import image
test_image = image.load_img('Dataset/coke_bottle.jpeg')
test_image = image.img_to_array(test_image)

FileNotFoundError: [Errno 2] No such file or directory: 'Dataset/coke_bottle.jpeg'

In [None]:
from keras.layers import Conv3D

input_shape = 225*225*3
model = Sequential()
model.add(Conv2D(48, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add((Conv3D(filters, kernel_size, padding='same', input_shape = (225,225,3))))

In [None]:
from keras.layers import Input, Dense
from keras.models import Model

# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

# this is our input placeholder
input_img = Input(shape=(784,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(784, activation='sigmoid')(encoded)

# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)

# this model maps an input to its encoded representation
encoder = Model(input_img, encoded)

# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))

autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

autoencoder.fit(x_train, x_train,
                epochs=50,
                batch_size=256,
                shuffle=True,
                validation_data=(x_test, x_test))


encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)

# use Matplotlib (don't ask)
import matplotlib.pyplot as plt

n = 10  # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()