In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

%matplotlib inline

# Load Features and Labels

In [2]:
# Load the data
train_df = pd.read_csv('./data/all/train.csv')
train_imgs_id = np.load('./result/train_imgs_id.npy')

test_df = pd.read_csv('./data/all/test.csv')
test_imgs_id = np.load('./result/test_imgs_id.npy')

print('Train:\t\t', train_df.shape, train_imgs_id.shape)
print('Test:\t\t', test_df.shape, test_imgs_id.shape)
print('Landmarks:\t', len(train_df['landmark_id'].unique()))

Train:		 (1225029, 3) (1192931,)
Test:		 (117703, 2) (108383,)
Landmarks:	 14951


In [5]:
train_x = np.load('./data/all/train_features.npy')
train_y = np.load('./data/all/train_id.npy')

test_x = np.load('./data/all/test_features.npy')

print('Train:\t', train_x.shape, train_y.shape)
print('Test:\t', test_x.shape)

Train:	 (1192931, 2048) (1192931,)
Test:	 (108383, 2048)


# Implement KNN Model

In [7]:
# Implement KNN model
knn = NearestNeighbors(n_neighbors=1, algorithm='auto', leaf_size=30, 
                       metric='minkowski', p=2, n_jobs=-1)
knn.fit(train_x)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=-1, n_neighbors=1, p=2, radius=1.0)

In [8]:
# Search the first neighbors
neighbor_index = knn.kneighbors(test_x, return_distance=False)

In [9]:
np.save('./result/knn_all_neighbor_index.npy', neighbor_index)
print('KNN Neighbor:\t\t', neighbor_index.shape)

KNN Neighbor:		 (108383, 1)


In [12]:
# Get prediction for each query images
landmarks = []
ids = []

for i in range(len(neighbor_index)):
    idx = test_imgs_id[i]
    ids.append(test_df.loc[idx, 'id'])
    landmarks.append(train_y[neighbor_index[i][0]])

prediction_tuple = [str(idx) + ' ' + '1.0' for idx in landmarks]

In [14]:
# Create submission files
sample_submission = pd.read_csv('./data/all/sample_submission.csv', usecols=['id'])

submission = pd.DataFrame({'id': ids, 'landmarks': prediction_tuple})
submission = pd.merge(sample_submission, submission, how='left', on='id')
submission.to_csv('./result/knn_all_submission.csv', index=False, columns=['id', 'landmarks'])