## About

In this notebook, I'll create a submission with the models of [GLRet21: EfficientNetB0 Baseline Training](https://www.kaggle.com/hidehisaarai1213/glret21-efficientnetb0-baseline-training).

This notebook is based on [DELG_global_baseline](https://www.kaggle.com/wowfattie/delg-global-baseline).

In [1]:
#!pip install ../input/kerasapplications/ > /dev/null
#!pip install ../input/efficientnet-keras-source-code/ > /dev/null

In [4]:
import gc
import os
import math
import random
import re
import warnings
from pathlib import Path
from PIL import Image
from typing import Optional, Tuple

import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy import spatial
from sklearn.preprocessing import normalize
from tqdm import tqdm

In [5]:
tf.__version__

In [6]:
import h5py
hf = h5py.File('../input/glret21-efficientnetb0-baseline-training/fold2.h5', 'r')
print(hf.keys())

In [7]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

## Settings

In [8]:
DATADIR = Path("../input/landmark-retrieval-2021/")
TEST_IMG_DIR = DATADIR / "test"
TRAIN_IMG_DIR = DATADIR / "index"

TOPK = 100
N_CLASSES = 81313 #total number of unique landmark images 

## Utilities

In [9]:
import time
from contextlib import contextmanager


@contextmanager
def timer(name):
    t0 = time.time()
    print(f"[{name}]")
    yield
    print(f"[{name}] done in {time.time() - t0:.0f}s")

## Model

## Feature Extraction

In [10]:
def extract_global_features(image_root_dir, n_models=4):
    pass

    #return ids, embeddings

## Main

In [11]:
def final_predictions():
    with timer("test embadding images"):
        test_ids,test_lmimages = extract_global_features(str(TEST_IMG_DIR)) 
    
    with timer("train embadding images"):
        train_ids,train_lmimages = extract_global_features(str(TRAIN_IMG_DIR))
        
    Prediction_string_list = []
    with timer("Images_compare"):
        #shape return-(row ,column)and shape[0]means row(total number of images in test_lmimages) 
        for text_index in range(test_lmimages.shape[0]):#range(total test_lmimages)
            distances = spatial.distance.cdist(test_lmimages[np.newaxis, test_index, :], train_lmimages, 'cosine')[0]
            partition = np.argpartition(distances, TOPK)[:TOPK]
            nearest = sorted([(train_ids[p], distances[p]) for p in partition], key=lambda x: x[1])
            pred_str = ""
            for train_id, cosine_distance in nearest:
                pred_str += train_id
                pred_str += " "
            Prediction_string_list.append(pred_str)

    return test_ids, Prediction_string_list

            
def main():
    test_images_list = []
    for root,dirs,files in os.walk(str(TEST_IMG_DIR)):
        for file in files:
            if file.endswith('.jpg'):#all images are in jpg formate only
                test_images_list.append(os.path.join(root, file))
    
    if len(test_images_list) == 1129:
        sub_df = pd.read_csv('../input/landmark-retrieval-2021/sample_submission.csv')
        sub_df.to_csv('submission.csv', index=False)
        return
    
    test_ids,Prediction_string_list = final_predictions()
    sub_df = pd.DataFrame(data={'id':test_ids, 'images':Prediction_string_list })
    sub_df.to_csv('submission.csv', index=False)


        
    

In [12]:
main()