In [None]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

%matplotlib inline

# Load Features and Labels

In [None]:
# Load the data
train_df = pd.read_csv('./data/all/train.csv')
train_imgs_id = np.load('./result/train_imgs_id.npy')

test_df = pd.read_csv('./data/all/test.csv')
test_imgs_id = np.load('./result/test_imgs_id.npy')

print('Train:\t\t', train_df.shape, train_imgs_id.shape)
print('Test:\t\t', test_df.shape, test_imgs_id.shape)
print('Landmarks:\t', len(train_df['landmark_id'].unique()))

In [None]:
train_x = np.load('./data/all/train_feature.npy')
train_y = np.load('./data/all/train_id.npy')

test_x = np.load('./data/all/test_feature.npy')

print('Train:\t', train_x.shape, train_y.shape)
print('Test:\t', test_x.shape)

# Implement KNN Model

In [None]:
# Implement KNN model
knn = NearestNeighbors(n_neighbors=1, algorithm='auto', leaf_size=30, 
                       metric='minkowski', p=2, n_jobs=-1)
knn.fit(train_x)

In [None]:
# Search the first neighbors
neighbor_index = knn.kneighbors(test_x, return_distance=False)

In [None]:
np.save('./result/knn_all_distance.npy', distance)
np.save('./result/knn_all_neighbor_index.npy', neighbor_index)

print('KNN Distance:\t\t', distance.shape)
print('KNN Neighbor:\t\t', neighbor_index.shape)

In [None]:
# Get prediction for each query images
landmarks = []
ids = []

for i in range(len(neighbor_index)):
    idx = test_imgs_id[i]
    ids.append(test_df.loc[idx, 'id'])
    landmarks.append(train_y[neighbor_index[i]]

prediction_tuple = [str(idx) + ' ' + '1.0' for idx in landmarks]

In [None]:
# Create submission files
sample_submission = pd.read_csv('./data/all/sample_submission.csv', usecols=['id'])

submission = pd.DataFrame({'id': ids, 'landmarks': prediction_tuple})
submission = pd.merge(sample_submission, submission, how='left', on='id')
submission.to_csv('./result/knn_all_submission.csv', index=False, columns=['id', 'landmarks'])