# Examples of how to use puppy embeddings.

In [1]:
%%capture

import paddle.v2 as paddle
import gzip
import numpy as np
import random

from category_stage import squeezenet
from siamese_stage import siamese, infer

from scipy.spatial.distance import cosine
import numpy as np

# Initialize PaddlePaddle.
paddle.init(use_gpu=False, trainer_count=1)

DATA_DIM = 3 * 128 * 128
CLASS_DIM = 120
BATCH_SIZE = 256

# Define input layers
image = paddle.layer.data(
    name="image", type=paddle.data_type.dense_vector(DATA_DIM))
lbl = paddle.layer.data(
    name="label", type=paddle.data_type.integer_value(CLASS_DIM))

# Configure the neural network.
out, intermediate = squeezenet(image, CLASS_DIM, True, True)
cost = paddle.layer.classification_cost(input=out, label=lbl)

with gzip.open('/book/working/models/params_pass_47.tar.gz', 'r') as f:
    parameters = paddle.parameters.Parameters.from_tar(f)
    
# Get data (or subsitute in your own!)
file_list = [line.strip().split("\t")[0] for line in open("/book/working/data/val.list")]
test_data = [(paddle.image.load_and_transform(image_file, 128 + 64, 128, False)
      .flatten().astype('float32'), )
     for image_file in file_list]

# Pseudo-batch data
batches = []
for i in range((len(test_data) / 100)):
    batches.append(test_data[i:i+100])

# Now lets load in our actual embeddings

In [2]:
# Get intermediate embs from squeezenet
intermediate_embs = []
for batch in batches:
    intermediate_embs += list(paddle.infer(
        output_layer=intermediate,
        parameters=parameters,
        input=batch))

# Get final embeddings from Siamese
embs = infer.intermediate(np.array(intermediate_embs, dtype=np.float32))

# Example usage
Cool ways of using puppy embeddings. Most of these are analogous to word embedding operations.

# Similarity calculation

One really easy way of calculating the similarity of two dog images is simply calculating the euclidian distance between their embeddings. First we find the distance between these two dogs:
![title](/tree/working/data/Images/n02085620-Chihuahua/n02085620_2650.jpg)
![title](/tree/working/data/Images/n02085620-Chihuahua/n02085620_2517.jpg)

In [3]:
# Here, we compare two pictures of chihuahuas.
print(file_list[0])
print(file_list[5])
# The distance between the chihuahuas is low: 0.1
print(cosine(embs[0], embs[5]))

/book/working/data/Images/n02085620-Chihuahua/n02085620_2650.jpg
/book/working/data/Images/n02085620-Chihuahua/n02085620_2517.jpg
0.10574257373809814


The distance between those two dogs is very low: 0.105. This is because the dogs are similar.

Now, let's find the distance between these two dogs
![title](/tree/working/data/Images/n02085620-Chihuahua/n02085620_2517.jpg)
![title](/tree/working/data/Images/n02085936-Maltese_dog/n02085936_13378.jpg)

In [4]:
# Here we compare a chihuahua and a maltese dog.
print(file_list[5])
print(file_list[209])
# The distance between them is high: 0.7
print(cosine(embs[5], embs[209]))

/book/working/data/Images/n02085620-Chihuahua/n02085620_2517.jpg
/book/working/data/Images/n02085936-Maltese_dog/n02085936_13378.jpg
0.7161149084568024


The distance between these two dogs is much higher than last time: 0.7. This is because the rare puppers look different.

# Puppy algebra

Now, we can apply algebra to puppies. For example, we will add two puppy pictures to get a puppy that looks similar to both. Here, we will add these two dogs:
![Dog #1](/tree/working/data/Images/n02085620-Chihuahua/n02085620_4919.jpg)
![Dog #2](/tree/working/data/Images/n02091831-Saluki/n02091831_648.jpg)

In [5]:
# We print the dogs' picture path.
print(file_list[1])
print(file_list[2047])
# We first add the two puppies embeddings together.
vector = embs[1] + embs[2047]
# We normalize the vector to norm1.
vector = vector / np.linalg.norm(vector)

# We find the closest image to the vector embedding.
min_dist = 1
best = None
for i, emb in enumerate(embs):
    if cosine(emb, vector) < min_dist:
        best = (i, emb)
        min_dist = cosine(emb, vector)

# We print the resulting dog's picture path.
print("Best match: " + file_list[best[0]])
# Print confidence score.
print(min_dist)

/book/working/data/Images/n02085620-Chihuahua/n02085620_4919.jpg
/book/working/data/Images/n02091831-Saluki/n02091831_648.jpg
Best match: /book/working/data/Images/n02115913-dhole/n02115913_3991.jpg
0.015775024890899658


As seen, the result of adding those two dogs gives us the dog at path `/book/working/data/Images/n02115913-dhole/n02115913_3991.jpg`. That path is this picture:
![Dog #1 + Dog #2 =](/tree/working/data/Images/n02115913-dhole/n02115913_3991.jpg)
It's got the ears and the mouth of the chihuahua, and the sleek and brown body of the maltese dog.
Try this on other pairs!