In [21]:
import operator
import graphlab
image_train = graphlab.SFrame('image_train_data/')
image_test = graphlab.SFrame('image_test_data/')
graphlab.canvas.set_target('ipynb')

# Computing summary statistics of the data

In [25]:
frequency = image_train['label'].sketch_summary().frequent_items()
answer1 = sorted(frequency.items(), key=operator.itemgetter(1))[0][0]

# Creating category-specific image retrieval models

In [55]:
dog_model = graphlab.nearest_neighbors.create(image_train[image_train['label'] == 'dog'], features=['deep_features'], label='id')
cat_model = graphlab.nearest_neighbors.create(image_train[image_train['label'] == 'cat'], features=['deep_features'], label='id')
automobile_model = graphlab.nearest_neighbors.create(image_train[image_train['label'] == 'automobile'], features=['deep_features'], label='id')
bird_model = graphlab.nearest_neighbors.create(image_train[image_train['label'] == 'bird'], features=['deep_features'], label='id')

In [56]:
answer2 = [cat_model.query(image_test[0:1])['reference_label'][0], dog_model.query(image_test[0:1])['reference_label'][0]]

# A simple example of nearest-neighbors classification

In [94]:
answer3 = [sum(cat_model.query(image_test[0:1])['distance'])/5,
           sum(dog_model.query(image_test[0:1])['distance'])/5]

# Computing nearest neighbors accuracy using SFrame operations

In [61]:
image_test_cat = image_test[image_test['label'] == 'cat']
image_test_dog = image_test[image_test['label'] == 'dog']
image_test_automobile = image_test[image_test['label'] == 'automobile']
image_test_bird = image_test[image_test['label'] == 'bird']

In [64]:
dog_cat_neighbors = cat_model.query(image_test_dog, k=1)
dog_dog_neighbours = dog_model.query(image_test_dog, k=1)
dog_automobile_neighbours = automobile_model.query(image_test_dog, k=1)
dog_bird_neighbours = bird_model.query(image_test_dog, k=1)

In [69]:
dog_distances = graphlab.SFrame({'dog-cat': dog_cat_neighbors['distance'], 'dog-dog': dog_dog_neighbours['distance'],
                                 'dog-automobile': dog_automobile_neighbours['distance'],
                                 'dog-bird': dog_bird_neighbours['distance']})

In [80]:
def is_dog_correct(row):
    dog_dog = row['dog-dog']
    if (dog_dog > row['dog-cat']) or (dog_dog > row['dog-automobile']) or (dog_dog > row['dog-bird']):
        return 0
    return 1

In [85]:
answer4 = (dog_distances.apply(is_dog_correct).sum() * 100) / len(dog_distances)

# Print the results

In [98]:
print 'Least common category in training data:', answer1
print ''

print 'ID of the nearest \'cat\' labeled image in the training data to the given cat image:', answer2[0]
print 'ID of the nearest \'dog\' labeled image in the training data to the given cat image:', answer2[1]
print ''

print 'Mean distance between the given image and its 5 nearest neighbors that were labeled \'cat\' in the training data'
print '    =', answer3[0]
print 'Mean distance between the given image and its 5 nearest neighbors that were labeled \'dog\' in the training data'
print '    =', answer3[1]
print ''

print 'Accuracy of the 1-nearest neighbor classifier at classifying \'dog\' images from the test set (in percent):', answer4

Least common category in training data: bird

ID of the nearest 'cat' labeled image in the training data to the given cat image: 16289
ID of the nearest 'dog' labeled image in the training data to the given cat image: 16976

Mean distance between the given image and its 5 nearest neighbors that were labeled 'cat' in the training data
    = 36.1557307098
Mean distance between the given image and its 5 nearest neighbors that were labeled 'dog' in the training data
    = 37.7707113618

Accuracy of the 1-nearest neighbor classifier at classifying 'dog' images from the test set (in percent): 67
