In [1]:
import graphlab

In [2]:
image_train = graphlab.SFrame('image_train_data/')
image_test = graphlab.SFrame('image_test_data/')

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1538731498.log


This non-commercial license of GraphLab Create for academic use is assigned to r38411@yahoo.com and will expire on August 31, 2019.


In [3]:
image_train.print_rows(num_rows=2)

+----+----------------------+-------+-------------------------------+
| id |        image         | label |         deep_features         |
+----+----------------------+-------+-------------------------------+
| 24 | Height: 32 Width: 32 |  bird | [0.242871761322, 1.0954537... |
| 33 | Height: 32 Width: 32 |  cat  | [0.525087952614, 0.0, 0.0,... |
+----+----------------------+-------+-------------------------------+
+-------------------------------+
|          image_array          |
+-------------------------------+
| [73.0, 77.0, 58.0, 71.0, 6... |
| [7.0, 5.0, 8.0, 7.0, 5.0, ... |
+-------------------------------+
[2005 rows x 5 columns]



In [4]:
image_train['label'].sketch_summary()


+------------------+-------+----------+
|       item       | value | is exact |
+------------------+-------+----------+
|      Length      |  2005 |   Yes    |
| # Missing Values |   0   |   Yes    |
| # unique values  |   4   |    No    |
+------------------+-------+----------+

Most frequent items:
+-------+------------+-----+-----+------+
| value | automobile | cat | dog | bird |
+-------+------------+-----+-----+------+
| count |    509     | 509 | 509 | 478  |
+-------+------------+-----+-----+------+


# Question 1:
# in label, there are 4 unique labels:
# automobile: 509, cat:509, dog:509, bird:478, total=2005

In [5]:
image_label = image_train['label'].unique()
image_label

dtype: str
Rows: 4
['bird', 'dog', 'cat', 'automobile']

In [None]:
## split the data into 4 categories

In [6]:
image_group = {}
for i in image_label:
    key_name = i + '_model'
    image_group[key_name] = image_train.filter_by(i, 'label')

In [None]:
image_group

In [8]:
image_group.keys()

['bird_model', 'cat_model', 'dog_model', 'automobile_model']

In [12]:
image_test[0:1]
graphlab.canvas.set_target('ipynb')
image_test[0:1]['image'].show()

In [13]:
cat_image = image_test[0:1]

In [14]:
## build 4 knn models

In [15]:
knn_image_models = {}
for i in image_label:
    model_label = i + '_model'
    knn_image_models[model_label] = graphlab.nearest_neighbors.create(image_group[model_label],
                                                        features = ['deep_features'],
                                                        label = 'id')

In [16]:
knn_image_models.keys()

['bird_model', 'cat_model', 'dog_model', 'automobile_model']

In [None]:
knn_image_models

In [18]:
nearest_query = {}
for i in image_label:
    model_label = i + '_model'
    nearest_query[model_label] = knn_image_models[model_label].query(cat_image)

In [19]:
nearest_query

{'automobile_model': Columns:
 	query_label	int
 	reference_label	int
 	distance	float
 	rank	int
 
 Rows: 5
 
 Data:
 +-------------+-----------------+---------------+------+
 | query_label | reference_label |    distance   | rank |
 +-------------+-----------------+---------------+------+
 |      0      |      49314      | 39.6710582792 |  1   |
 |      0      |      12389      | 40.2107393099 |  2   |
 |      0      |      15299      | 41.2103305258 |  3   |
 |      0      |      40118      | 41.3292164653 |  4   |
 |      0      |      17111      |  41.352687775 |  5   |
 +-------------+-----------------+---------------+------+
 [5 rows x 4 columns], 'bird_model': Columns:
 	query_label	int
 	reference_label	int
 	distance	float
 	rank	int
 
 Rows: 5
 
 Data:
 +-------------+-----------------+---------------+------+
 | query_label | reference_label |    distance   | rank |
 +-------------+-----------------+---------------+------+
 |      0      |      49573      |  38.074265869 |  

In [20]:
cat_image

id,image,label,deep_features,image_array
0,Height: 32 Width: 32,cat,"[1.13469004631, 0.0, 0.0, 0.0, 0.0366497635841, ...","[158.0, 112.0, 49.0, 159.0, 111.0, 47.0, ..."


# Question 2:
# for image_test[0:1], 
# the nearest neighbors in the dog_model = 16976, distance = 37.4642
# the nearest neighbors in the cat_model = 16289, distance = 34.6237

In [23]:
id = nearest_query['cat_model']['reference_label'][0]
image_train[image_train['id']==id]['image'].show()

In [None]:
## display those nearest neighbors
for i in image_label:
    model_label = i + '_model'
    id = nearest_query[model_label]['reference_label'][0]
    image_train[image_train['id']==id]['image'].show()