# Building an image retrieval system with deep features


# Fire up GraphLab Create
(See [Getting Started with SFrames](../Week%201/Getting%20Started%20with%20SFrames.ipynb) for setup instructions)

In [1]:
import graphlab

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 30 days



In [2]:
# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1503689440.log


This non-commercial license of GraphLab Create for academic use is assigned to gauriaddepalli@gmail.com and will expire on August 10, 2018.


# Load the CIFAR-10 dataset

We will use a popular benchmark dataset in computer vision called CIFAR-10.  

(We've reduced the data to just 4 categories = {'cat','bird','automobile','dog'}.)

This dataset is already split into a training set and test set. In this simple retrieval example, there is no notion of "testing", so we will only use the training data.

In [3]:
image_train = graphlab.SFrame('image_train_data/')
image_test = graphlab.SFrame('image_test_data/')

# Computing deep features for our images

The two lines below allow us to compute deep features.  This computation takes a little while, so we have already computed them and saved the results as a column in the data you loaded. 

(Note that if you would like to compute such deep features and have a GPU on your machine, you should use the GPU enabled GraphLab Create, which will be significantly faster for this task.)

In [4]:
# deep_learning_model = graphlab.load_model('http://s3.amazonaws.com/GraphLab-Datasets/deeplearning/imagenet_model_iter45')
# image_train['deep_features'] = deep_learning_model.extract_features(image_train)

In [5]:
image_train.head()

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.242871761322, 1.09545373917, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
70,Height: 32 Width: 32,dog,"[1.12979578972, 0.0, 0.0, 0.778194487095, 0.0, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
90,Height: 32 Width: 32,bird,"[1.71786928177, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."
97,Height: 32 Width: 32,automobile,"[1.57818555832, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[33.0, 44.0, 27.0, 29.0, 44.0, 31.0, 32.0, 45.0, ..."
107,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.220677852631, 0.0, ...","[97.0, 51.0, 31.0, 104.0, 58.0, 38.0, 107.0, 61.0, ..."
121,Height: 32 Width: 32,bird,"[0.0, 0.23753464222, 0.0, 0.0, 0.0, 0.0, ...","[93.0, 96.0, 88.0, 102.0, 106.0, 97.0, 117.0, ..."
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.5737862587, 0.0, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."
138,Height: 32 Width: 32,bird,"[0.658935725689, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[205.0, 193.0, 195.0, 200.0, 187.0, 193.0, ..."


# Train a nearest-neighbors model for retrieving images using deep features

We will now build a simple image retrieval system that finds the nearest neighbors for any image.

In [112]:
dog = image_train[image_train['label']=='dog']
image_test_dog = image_test[image_test['label']=='dog']

In [113]:
cat = image_train[image_train['label']=='cat']
image_test_cat = image_test[image_test['label']=='cat']

In [114]:
bird = image_train[image_train['label']=='bird']
image_test_bird = image_test[image_test['label']=='bird']

In [115]:
automobile = image_train[image_train['label']=='automobile']
image_test_automobile = image_test[image_test['label']=='automobile']

In [10]:
knn_model = graphlab.nearest_neighbors.create(image_train,features=['deep_features'],
                                             label='id')

In [11]:
knn_model_dog = graphlab.nearest_neighbors.create(dog,features=['deep_features'],
                                             label='id')

# Use image retrieval model with deep features to find similar images

Let's find similar images to this cat picture.

In [12]:
knn_model_cat = graphlab.nearest_neighbors.create(cat,features=['deep_features'],
                                             label='id')

In [13]:
knn_model_bird = graphlab.nearest_neighbors.create(bird,features=['deep_features'],
                                             label='id')

In [14]:
knn_model_automobile = graphlab.nearest_neighbors.create(automobile,features=['deep_features'],
                                             label='id')

In [15]:
graphlab.canvas.set_target('ipynb')
cat = image_train[18:19]
cat1 = image_test[0:1]
cat1['image'].show()

In [110]:
c = knn_model_cat.query(cat1)
c['distance'].mean()

36.15573070978294

In [111]:
d = knn_model_dog.query(cat1)
d['distance'].mean()

37.77071136184157

We are going to create a simple function to view the nearest neighbors to save typing:

In [17]:
image_train.head()

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.242871761322, 1.09545373917, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
70,Height: 32 Width: 32,dog,"[1.12979578972, 0.0, 0.0, 0.778194487095, 0.0, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
90,Height: 32 Width: 32,bird,"[1.71786928177, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."
97,Height: 32 Width: 32,automobile,"[1.57818555832, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[33.0, 44.0, 27.0, 29.0, 44.0, 31.0, 32.0, 45.0, ..."
107,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.220677852631, 0.0, ...","[97.0, 51.0, 31.0, 104.0, 58.0, 38.0, 107.0, 61.0, ..."
121,Height: 32 Width: 32,bird,"[0.0, 0.23753464222, 0.0, 0.0, 0.0, 0.0, ...","[93.0, 96.0, 88.0, 102.0, 106.0, 97.0, 117.0, ..."
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.5737862587, 0.0, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."
138,Height: 32 Width: 32,bird,"[0.658935725689, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[205.0, 193.0, 195.0, 200.0, 187.0, 193.0, ..."


In [86]:
def get_images_from_ids(query_result):
    return image_train.filter_by(query_result['reference_label'],'id')

In [99]:
cat1_neighbors = get_images_from_ids(knn_model_cat.query(cat1))


In [101]:
dog1_neighbors = get_images_from_ids(knn_model_dog.query(cat1))

In [103]:
cat1_neighbors['image'].show()

In [104]:
dog1_neighbors['image'].show()

In [105]:
cat1_neighbors

id,image,label,deep_features,image_array
331,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.510963916779, 0.0, ...","[45.0, 65.0, 92.0, 72.0, 95.0, 110.0, 106.0, ..."
16289,Height: 32 Width: 32,cat,"[0.964287519455, 0.0, 0.0, 0.0, 1.12515509129, ...","[215.0, 219.0, 231.0, 215.0, 219.0, 232.0, ..."
25713,Height: 32 Width: 32,cat,"[0.536971271038, 0.0, 0.0, 0.0894458889961, ...","[228.0, 222.0, 236.0, 224.0, 213.0, 222.0, ..."
32139,Height: 32 Width: 32,cat,"[1.29409468174, 0.0, 0.0, 0.513800263405, ...","[217.0, 220.0, 205.0, 221.0, 227.0, 218.0, ..."
45646,Height: 32 Width: 32,cat,"[0.983677506447, 0.0, 0.0, 0.0, 0.0, ...","[51.0, 42.0, 26.0, 56.0, 47.0, 31.0, 59.0, 50.0, ..."


In [139]:
len(image_test_dog)

1000

In [119]:
dog_cat_neighbors = knn_model_cat.query(image_test_dog, k=1)
dog_bird_neighbors = knn_model_bird.query(image_test_dog, k=1)
dog_automobile_neighbors = knn_model_automobile.query(image_test_dog, k=1)
dog_dog_neighbors = knn_model_dog.query(image_test_dog, k=1)
dog_automobile_neighbors

query_label,reference_label,distance,rank
0,33859,41.9579761457,1
1,2046,46.0021331807,1
2,19594,42.9462290692,1
3,11000,41.6866060048,1
4,19594,39.2269664935,1
5,49314,40.5845117698,1
6,40822,45.1067352961,1
7,44997,41.3221140974,1
8,33859,41.8244654995,1
9,33859,45.4976929401,1


In [120]:
dog_distances = graphlab.SFrame({'dog_automobile': dog_automobile_neighbors['distance'],'dog_bird': dog_bird_neighbors['distance'],'dog_cat': dog_cat_neighbors['distance'],'dog_dog': dog_dog_neighbors['distance']})

In [121]:
dog_distances

dog_automobile,dog_bird,dog_cat,dog_dog
41.9579761457,41.7538647304,36.4196077068,33.4773590373
46.0021331807,41.3382958925,38.8353268874,32.8458495684
42.9462290692,38.6157590853,36.9763410854,35.0397073189
41.6866060048,37.0892269954,34.5750072914,33.9010327697
39.2269664935,38.272288694,34.778824791,37.4849250909
40.5845117698,39.1462089236,35.1171578292,34.945165344
45.1067352961,40.523040106,40.6095830913,39.0957278345
41.3221140974,38.1947918393,39.9036867306,37.7696131032
41.8244654995,40.1567131661,38.0674700168,35.1089144603
45.4976929401,45.5597962603,42.7258732951,43.2422832585


In [135]:
def is_dog_correct(row):
    value = min(row, key=row.get)
    if(value=='dog_dog'):
        return 1;
    else:
        return 0;

In [137]:
tosum = dog_distances.apply(lambda x:is_dog_correct(x))

In [138]:
tosum.sum()

678

Very cool results showing similar cats.

## Finding similar images to a car

In [106]:
dog1_neighbors

id,image,label,deep_features,image_array
6094,Height: 32 Width: 32,dog,"[0.470533549786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[91.0, 98.0, 71.0, 138.0, 123.0, 63.0, 135.0, ..."
13387,Height: 32 Width: 32,dog,"[0.366494178772, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[255.0, 255.0, 255.0, 255.0, 255.0, 255.0, ..."
16976,Height: 32 Width: 32,dog,"[0.755595386028, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[16.0, 17.0, 11.0, 18.0, 19.0, 13.0, 20.0, 21.0, ..."
35867,Height: 32 Width: 32,dog,"[0.305321395397, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[101.0, 93.0, 9.0, 93.0, 88.0, 9.0, 90.0, 86.0, ..."
44603,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 11.2646656036, 0.0, ...","[8.0, 25.0, 9.0, 29.0, 39.0, 22.0, 66.0, 75.0, ..."


In [22]:
car = image_train[8:9]
car['image'].show()

In [23]:
get_images_from_ids(knn_model.query(car))['image'].show()

# Just for fun, let's create a lambda to find and show nearest neighbor images

In [24]:
show_neighbors = lambda i: get_images_from_ids(knn_model.query(image_train[i:i+1]))['image'].show()

In [25]:
show_neighbors(8)

In [26]:
show_neighbors(0)

In [27]:
knn_model.query(image_train[0:1])

query_label,reference_label,distance,rank
0,24,0.0,1
0,30224,36.5454765642,2
0,34071,37.8011450878,3
0,37742,41.0399299616,4
0,36130,41.5105447214,5


In [28]:
show_neighbors(26)

In [29]:
# creating subset of image_train dataset and category-specific image retrieval models

In [30]:
image_train_cat = image_train.filter_by('cat','label')
image_train_dog = image_train.filter_by('dog','label')
image_train_automobile = image_train.filter_by('automobile','label')
image_train_bird = image_train.filter_by('bird','label')

In [31]:
image_train_cat

id,image,label,deep_features,image_array
33,Height: 32 Width: 32,cat,"[0.525087952614, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[7.0, 5.0, 8.0, 7.0, 5.0, 8.0, 5.0, 4.0, 6.0, 7.0, ..."
36,Height: 32 Width: 32,cat,"[0.566015958786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[169.0, 122.0, 65.0, 131.0, 108.0, 75.0, ..."
159,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.0, 0.64327532053, 0.0, 0.0, ...","[154.0, 145.0, 135.0, 152.0, 144.0, 135.0, ..."
331,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.510963916779, 0.0, ...","[45.0, 65.0, 92.0, 72.0, 95.0, 110.0, 106.0, ..."
367,Height: 32 Width: 32,cat,"[1.38658058643, 0.0, 0.0, 0.0, 0.0, 0.182891070 ...","[168.0, 151.0, 143.0, 145.0, 130.0, 124.0, ..."
384,Height: 32 Width: 32,cat,"[1.04403531551, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[46.0, 45.0, 50.0, 47.0, 45.0, 51.0, 45.0, 44.0, ..."
494,Height: 32 Width: 32,cat,"[0.0, 0.0539512038231, 1.95745122433, 0.0, 0.0, ...","[26.0, 34.0, 29.0, 24.0, 29.0, 25.0, 33.0, 43.0, ..."
597,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.0470637083054, 0.0, ...","[133.0, 153.0, 138.0, 126.0, 146.0, 136.0, ..."
788,Height: 32 Width: 32,cat,"[0.505841910839, 0.0, 0.0, 0.0, 0.427211523 ...","[184.0, 200.0, 197.0, 189.0, 203.0, 200.0, ..."
882,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.156200289726, 0.0, ...","[141.0, 133.0, 112.0, 143.0, 133.0, 113.0, ..."


In [32]:
image_train_dog

id,image,label,deep_features,image_array
70,Height: 32 Width: 32,dog,"[1.12979578972, 0.0, 0.0, 0.778194487095, 0.0, ...","[154.0, 179.0, 152.0, 159.0, 183.0, 157.0, ..."
107,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.220677852631, 0.0, ...","[97.0, 51.0, 31.0, 104.0, 58.0, 38.0, 107.0, 61.0, ..."
177,Height: 32 Width: 32,dog,"[0.0, 1.45965671539, 0.0, 0.422992348671, 0.0, ...","[55.0, 75.0, 42.0, 51.0, 76.0, 37.0, 57.0, 83.0, ..."
424,Height: 32 Width: 32,dog,"[0.942399680614, 0.0, 0.220352768898, 0.0, ...","[60.0, 35.0, 18.0, 63.0, 49.0, 38.0, 66.0, 56.0, ..."
462,Height: 32 Width: 32,dog,"[1.43462562561, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[86.0, 69.0, 75.0, 57.0, 41.0, 48.0, 46.0, 35.0, ..."
542,Height: 32 Width: 32,dog,"[0.451547086239, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[196.0, 174.0, 113.0, 140.0, 117.0, 65.0, 8 ..."
573,Height: 32 Width: 32,dog,"[0.592360973358, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[153.0, 103.0, 52.0, 151.0, 102.0, 49.0, ..."
851,Height: 32 Width: 32,dog,"[0.690123438835, 0.0, 0.0, 0.0, 0.305860161 ...","[39.0, 6.0, 4.0, 53.0, 23.0, 24.0, 57.0, 37.0, ..."
919,Height: 32 Width: 32,dog,"[0.0, 0.177558660507, 0.139396846294, 0.0, ...","[29.0, 43.0, 4.0, 24.0, 35.0, 6.0, 24.0, 37.0, ..."
1172,Height: 32 Width: 32,dog,"[0.517601490021, 0.0, 1.96418333054, 0.0, 0.0, ...","[182.0, 180.0, 197.0, 196.0, 192.0, 209.0, ..."


In [33]:
image_train_automobile

id,image,label,deep_features,image_array
97,Height: 32 Width: 32,automobile,"[1.57818555832, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[33.0, 44.0, 27.0, 29.0, 44.0, 31.0, 32.0, 45.0, ..."
136,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.5737862587, 0.0, ...","[35.0, 59.0, 53.0, 36.0, 56.0, 56.0, 42.0, 62.0, ..."
302,Height: 32 Width: 32,automobile,"[0.583938002586, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[64.0, 52.0, 37.0, 85.0, 60.0, 40.0, 92.0, 66.0, ..."
312,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.392823398113, 0.0, ...","[124.0, 126.0, 113.0, 124.0, 126.0, 113.0, ..."
323,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 4.42310428619, ...","[241.0, 241.0, 241.0, 238.0, 238.0, 238.0, ..."
536,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.42903900146, 0.0, ...","[164.0, 154.0, 154.0, 128.0, 119.0, 120.0, ..."
593,Height: 32 Width: 32,automobile,"[1.65033948421, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[231.0, 222.0, 227.0, 232.0, 217.0, 221.0, ..."
962,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.39552795887, 0.0, 0.0, ...","[255.0, 255.0, 255.0, 255.0, 255.0, 255.0, ..."
997,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.04085636139, 0.0, ...","[145.0, 148.0, 157.0, 131.0, 134.0, 145.0, ..."
1421,Height: 32 Width: 32,automobile,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.359612941742, ...","[114.0, 95.0, 33.0, 118.0, 98.0, 26.0, 91.0, ..."


In [34]:
image_train_bird

id,image,label,deep_features,image_array
24,Height: 32 Width: 32,bird,"[0.242871761322, 1.09545373917, 0.0, ...","[73.0, 77.0, 58.0, 71.0, 68.0, 50.0, 77.0, 69.0, ..."
90,Height: 32 Width: 32,bird,"[1.71786928177, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[216.0, 195.0, 180.0, 201.0, 178.0, 160.0, ..."
121,Height: 32 Width: 32,bird,"[0.0, 0.23753464222, 0.0, 0.0, 0.0, 0.0, ...","[93.0, 96.0, 88.0, 102.0, 106.0, 97.0, 117.0, ..."
138,Height: 32 Width: 32,bird,"[0.658935725689, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[205.0, 193.0, 195.0, 200.0, 187.0, 193.0, ..."
335,Height: 32 Width: 32,bird,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.50706672668, 0.0, ...","[160.0, 159.0, 154.0, 162.0, 161.0, 156.0, ..."
560,Height: 32 Width: 32,bird,"[1.69159495831, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[147.0, 138.0, 88.0, 151.0, 142.0, 92.0, ..."
649,Height: 32 Width: 32,bird,"[0.511156201363, 0.324165046215, 0.0, ...","[65.0, 127.0, 9.0, 127.0, 160.0, 15.0, 159.0, ..."
775,Height: 32 Width: 32,bird,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 10.0127315521, 0.0, ...","[29.0, 41.0, 25.0, 29.0, 42.0, 25.0, 28.0, 41.0, ..."
802,Height: 32 Width: 32,bird,"[0.277166724205, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[233.0, 230.0, 173.0, 222.0, 218.0, 168.0, ..."
975,Height: 32 Width: 32,bird,"[0.0, 0.0336718559265, 0.0, 0.645326733589, ...","[59.0, 180.0, 110.0, 88.0, 186.0, 117.0, ..."


In [35]:
categories = ['cat','bird','automobile','dog'] 

In [36]:
knn_model_cat = graphlab.nearest_neighbors.create(image_train_cat,features=['deep_features'],
                                             label='id')

In [37]:
knn_model_dog = graphlab.nearest_neighbors.create(image_train_dog,features=['deep_features'],
                                             label='id')

In [38]:
knn_model_bird = graphlab.nearest_neighbors.create(image_train_bird,features=['deep_features'],
                                             label='id')

In [39]:
knn_model_automobile = graphlab.nearest_neighbors.create(image_train_automobile,features=['deep_features'],
                                             label='id')

In [40]:
knn_model_all = [knn_model_cat,knn_model_dog,knn_model_bird,knn_model_automobile]

In [41]:
show_neighbors_categories = lambda i,j: get_images_from_ids(knn_model_all[j].query(image_train[i:i+1]))['image'].show()

In [42]:
image_test[0:1]['image'].show()

In [43]:
knn_model_all[0]

Class                          : NearestNeighborsModel

Attributes
----------
Method                         : brute_force
Number of distance components  : 1
Number of examples             : 509
Number of feature columns      : 1
Number of unpacked features    : 4096
Total training time (seconds)  : 0.2621

In [44]:
show_neighbors_categories(0,0)

In [45]:
knn_model_cat.query(image_test[0:1])

query_label,reference_label,distance,rank
0,16289,34.623719208,1
0,45646,36.0068799284,2
0,32139,36.5200813436,3
0,25713,36.7548502521,4
0,331,36.8731228168,5


In [46]:
# calculating mean for nearest cat
mean_sum=sum((knn_model_cat.query(image_test[0:1]))['distance'])
mean = mean_sum/len(knn_model_cat.query(image_test[0:1]))
mean

36.15573070978294

In [47]:
get_images_from_ids(knn_model_cat.query(image_test[0:1]))

id,image,label,deep_features,image_array
331,Height: 32 Width: 32,cat,"[0.0, 0.0, 0.510963916779, 0.0, ...","[45.0, 65.0, 92.0, 72.0, 95.0, 110.0, 106.0, ..."
16289,Height: 32 Width: 32,cat,"[0.964287519455, 0.0, 0.0, 0.0, 1.12515509129, ...","[215.0, 219.0, 231.0, 215.0, 219.0, 232.0, ..."
25713,Height: 32 Width: 32,cat,"[0.536971271038, 0.0, 0.0, 0.0894458889961, ...","[228.0, 222.0, 236.0, 224.0, 213.0, 222.0, ..."
32139,Height: 32 Width: 32,cat,"[1.29409468174, 0.0, 0.0, 0.513800263405, ...","[217.0, 220.0, 205.0, 221.0, 227.0, 218.0, ..."
45646,Height: 32 Width: 32,cat,"[0.983677506447, 0.0, 0.0, 0.0, 0.0, ...","[51.0, 42.0, 26.0, 56.0, 47.0, 31.0, 59.0, 50.0, ..."


In [48]:
get_images_from_ids(knn_model_cat.query(image_test[0:1]))['image'].show()

In [49]:
get_images_from_ids(knn_model_dog.query(image_test[0:1]))

id,image,label,deep_features,image_array
6094,Height: 32 Width: 32,dog,"[0.470533549786, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[91.0, 98.0, 71.0, 138.0, 123.0, 63.0, 135.0, ..."
13387,Height: 32 Width: 32,dog,"[0.366494178772, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[255.0, 255.0, 255.0, 255.0, 255.0, 255.0, ..."
16976,Height: 32 Width: 32,dog,"[0.755595386028, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[16.0, 17.0, 11.0, 18.0, 19.0, 13.0, 20.0, 21.0, ..."
35867,Height: 32 Width: 32,dog,"[0.305321395397, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[101.0, 93.0, 9.0, 93.0, 88.0, 9.0, 90.0, 86.0, ..."
44603,Height: 32 Width: 32,dog,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 11.2646656036, 0.0, ...","[8.0, 25.0, 9.0, 29.0, 39.0, 22.0, 66.0, 75.0, ..."


In [50]:
knn_model_dog.query(image_test[0:1])

query_label,reference_label,distance,rank
0,16976,37.4642628784,1
0,13387,37.5666832169,2
0,35867,37.6047267079,3
0,44603,37.7065585153,4
0,6094,38.5113254907,5


In [51]:
# calculating mean for nearest dog
mean_sum=sum((knn_model_dog.query(image_test[0:1]))['distance'])
mean = mean_sum/len(knn_model_dog.query(image_test[0:1]))
mean

37.77071136184157

In [52]:
get_images_from_ids(knn_model_cat.query(image_test[0:1]))['image'].show()

In [53]:
get_images_from_ids(knn_model_dog.query(image_test[0:1]))['image'].show()

In [54]:
#reference_number and id are the same.

In [55]:
image_train[251:252]

id,image,label,deep_features,image_array
6041,Height: 32 Width: 32,bird,"[0.285111188889, 0.0, 0.0, 0.675408244133, ...","[150.0, 176.0, 213.0, 145.0, 172.0, 208.0, ..."


In [56]:
# creating the k nearest neighbour classifier
knnc_model = graphlab.nearest_neighbor_classifier.create(image_train,target ='label',features=['deep_features'])

In [57]:
knnc_model.predict(image_test)

dtype: str
Rows: 4000
['cat', 'automobile', 'cat', 'automobile', 'dog', 'dog', 'dog', 'bird', 'dog', 'cat', 'cat', 'automobile', 'cat', 'dog', 'cat', 'cat', 'dog', 'automobile', 'bird', 'automobile', 'bird', 'cat', 'bird', 'bird', 'cat', 'dog', 'automobile', 'automobile', 'dog', 'dog', 'automobile', 'cat', 'dog', 'cat', 'automobile', 'automobile', 'bird', 'bird', 'automobile', 'cat', 'cat', 'cat', 'automobile', 'bird', 'dog', 'cat', 'bird', 'automobile', 'automobile', 'cat', 'bird', 'dog', 'cat', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'bird', 'automobile', 'dog', 'dog', 'dog', 'dog', 'bird', 'cat', 'dog', 'cat', 'dog', 'automobile', 'bird', 'dog', 'automobile', 'automobile', 'cat', 'dog', 'dog', 'dog', 'cat', 'dog', 'automobile', 'cat', 'dog', 'dog', 'automobile', 'automobile', 'cat', 'automobile', 'bird', 'cat', 'dog', 'cat', 'cat', 'automobile', 'dog', 'cat', 'dog', 'cat', 'cat', ... ]

In [58]:
knnc_model.evaluate(image_test)



{'accuracy': 0.75075, 'confusion_matrix': Columns:
 	target_label	str
 	predicted_label	str
 	count	int
 
 Rows: 16
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |  automobile  |       dog       |   2   |
 |     dog      |    automobile   |   11  |
 |     dog      |       dog       |  722  |
 |     cat      |       dog       |  268  |
 |  automobile  |    automobile   |  975  |
 |     bird     |    automobile   |   21  |
 |  automobile  |       cat       |   18  |
 |     bird     |       cat       |  228  |
 |  automobile  |       bird      |   5   |
 |     bird     |       bird      |  647  |
 +--------------+-----------------+-------+
 [16 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}

In [59]:
# creating subset of image_test dataset and category-specific image retrieval models

In [60]:
image_test_cat = image_test.filter_by('cat','label')
image_test_dog = image_test.filter_by('dog','label')
image_test_automobile = image_test.filter_by('automobile','label')
image_test_bird = image_test.filter_by('bird','label')

In [61]:
print  len(image_test_dog)
print  len(image_test_cat)
print  len(image_test_bird)
print  len(image_test_automobile)
print  len(image_test)


1000
1000
1000
1000
4000


In [62]:
dog_cat_neighbors = knn_model_cat.query(image_test_dog, k=1)
dog_dog_neighbors = knn_model_dog.query(image_test_dog, k=1)
dog_bird_neighbors = knn_model_bird.query(image_test_dog, k=1)
dog_automobile_neighbors = knn_model_automobile.query(image_test_dog, k=1)

In [63]:
dog_cat_neighbors

query_label,reference_label,distance,rank
0,33,36.4196077068,1
1,30606,38.8353268874,1
2,5545,36.9763410854,1
3,19631,34.5750072914,1
4,7493,34.778824791,1
5,47044,35.1171578292,1
6,13918,40.6095830913,1
7,10981,39.9036867306,1
8,45456,38.0674700168,1
9,44673,42.7258732951,1


In [64]:
get_images_from_ids(dog_cat_neighbors)['image'].show()

In [65]:
dog_dog_neighbors

query_label,reference_label,distance,rank
0,49803,33.4773590373,1
1,5755,32.8458495684,1
2,20715,35.0397073189,1
3,13387,33.9010327697,1
4,12089,37.4849250909,1
5,6094,34.945165344,1
6,3431,39.0957278345,1
7,6184,37.7696131032,1
8,2167,35.1089144603,1
9,7776,43.2422832585,1


In [66]:
dog_bird_neighbors

query_label,reference_label,distance,rank
0,44658,41.7538647304,1
1,9215,41.3382958925,1
2,36675,38.6157590853,1
3,12582,37.0892269954,1
4,36122,38.272288694,1
5,8736,39.1462089236,1
6,38991,40.523040106,1
7,44177,38.1947918393,1
8,4549,40.1567131661,1
9,40225,45.5597962603,1


In [67]:
dog_automobile_neighbors

query_label,reference_label,distance,rank
0,33859,41.9579761457,1
1,2046,46.0021331807,1
2,19594,42.9462290692,1
3,11000,41.6866060048,1
4,19594,39.2269664935,1
5,49314,40.5845117698,1
6,40822,45.1067352961,1
7,44997,41.3221140974,1
8,33859,41.8244654995,1
9,33859,45.4976929401,1


In [68]:
dog_distance_sframe = graphlab.SFrame({'dog-automobile': dog_automobile_neighbors['distance'],
                                       'dog-bird': dog_bird_neighbors['distance'],
                                       'dog-cat': dog_cat_neighbors['distance'],
                                       'dog-dog': dog_dog_neighbors['distance']})

In [69]:
dog_distance_sframe

dog-automobile,dog-bird,dog-cat,dog-dog
41.9579761457,41.7538647304,36.4196077068,33.4773590373
46.0021331807,41.3382958925,38.8353268874,32.8458495684
42.9462290692,38.6157590853,36.9763410854,35.0397073189
41.6866060048,37.0892269954,34.5750072914,33.9010327697
39.2269664935,38.272288694,34.778824791,37.4849250909
40.5845117698,39.1462089236,35.1171578292,34.945165344
45.1067352961,40.523040106,40.6095830913,39.0957278345
41.3221140974,38.1947918393,39.9036867306,37.7696131032
41.8244654995,40.1567131661,38.0674700168,35.1089144603
45.4976929401,45.5597962603,42.7258732951,43.2422832585


In [70]:
def is_dog_correct(row):
    if((row['dog-dog']<row['dog-automobile']) and (row['dog-dog']<row['dog-bird']) and (row['dog-dog']<row['dog-cat'])):
        return 1
    else:
        return 0

In [71]:
row = dog_distance_sframe[1]

In [72]:
row

{'dog-automobile': 46.002133180677895,
 'dog-bird': 41.3382958924861,
 'dog-cat': 38.83532688735544,
 'dog-dog': 32.845849568405555}

In [73]:
is_dog_correct(row)

1

In [74]:
dog_distance_sframe.apply(is_dog_correct)

dtype: int
Rows: 1000
[1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, ... ]

In [75]:
total_correct = dog_distance_sframe.apply(is_dog_correct).sum()
print total_correct

678


In [76]:
accuracy = total_correct/10

In [77]:
accuracy

67

In [78]:
# doing for the cat model:
cat_cat_neighbors = knn_model_cat.query(image_test_cat, k=1)
cat_dog_neighbors = knn_model_dog.query(image_test_cat, k=1)
cat_bird_neighbors = knn_model_bird.query(image_test_cat, k=1)
cat_automobile_neighbors = knn_model_automobile.query(image_test_cat, k=1)

In [79]:
cat_distance_sframe = graphlab.SFrame({'cat-automobile': cat_automobile_neighbors['distance'],
                                       'cat-bird': cat_bird_neighbors['distance'],
                                       'cat-cat': cat_cat_neighbors['distance'],
                                       'cat-dog': cat_dog_neighbors['distance']})

In [80]:
cat_distance_sframe

cat-automobile,cat-bird,cat-cat,cat-dog
39.6710582792,38.074265869,34.623719208,37.4642628784
43.0089056688,36.3674024138,33.8680579302,29.3472319585
38.6010006604,35.3039394947,32.4615168902,32.2599640475
39.3566307091,38.8944029601,35.7708210254,35.3852085188
38.3572372618,34.2820409875,31.1577686417,30.0442985088
42.0904793181,44.5352170178,41.3986035847,35.4741000424
39.0520251253,34.0290595084,30.9894594959,32.5845275226
39.3058645069,39.0236924983,37.0814607387,37.6502852614
43.0248129799,40.8334054297,39.9883863688,36.9801353512
45.6749176426,40.1258835601,39.7076633097,41.1259410707


In [81]:
def is_cat_correct(row):
    if((row['cat-cat']<row['cat-automobile']) and (row['cat-cat']<row['cat-bird']) and (row['cat-cat']<row['cat-dog'])):
        return 1
    else:
        return 0

In [82]:
cat_distance_sframe.apply(is_cat_correct)

dtype: int
Rows: 1000
[1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, ... ]

In [83]:
cat_accu_total = cat_distance_sframe.apply(is_cat_correct).sum()

In [84]:
accu = cat_accu_total/10