In [None]:
!pip install /kaggle/input/foursquare-libraries/requests_file-1.5.1-py2.py3-none-any.whl
!pip install /kaggle/input/foursquare-libraries/tldextract-3.3.0-py3-none-any.whl
!pip install /kaggle/input/foursquare-libraries/thefuzz-0.19.0-py2.py3-none-any.whl

In [None]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
%%python

# extract embeddings

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import extract_embeddings


# define paths
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models_arcface = dir_kaggle_input / "foursquare-models/arcface"

path_test = dir_dataset_test / "test.csv"
dir_embeddings = dir_kaggle_working / "embeddings"

# extract embeddings
os.makedirs(dir_embeddings, exist_ok=True)

paths_embeddings = list()
for dir_model in dir_dataset_models_arcface.glob("*"):
    for dir_fold_model in dir_model.glob("*"):
        dir_fold_embeddings = dir_embeddings / dir_fold_model.name
        os.makedirs(dir_fold_embeddings, exist_ok=True)
        
        name_model = dir_model.stem
        path_emb = dir_fold_embeddings / f"{name_model}.npy"

        config_model = load_config(dir_fold_model / "config.json")
        print(f"{name_model} model config: {config_model}")

        extract_embeddings(
            config=config_model,
            path_locations=path_test,
            dir_model=dir_fold_model,
            path_embeddings_output=path_emb)

In [None]:
%%python

# blend embeddings

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import blend_embeddings_neighbors


# define paths
dir_src = "/kaggle/input/foursquare-src"
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_embeddings = dir_kaggle_working / "embeddings"
path_config = dir_kaggle_input / "foursquare-src/config.json"

config = load_config(path_config)
print(f"nearest neighbors config: {config}")

for path_embedding in dir_embeddings.rglob("*.npy"):
    blend_embeddings_neighbors(config, path_embedding)

In [None]:
%%python

# build pairs

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import make_pairs


# define paths
dir_src = "/kaggle/input/foursquare-src"
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models = dir_kaggle_input / "foursquare-models"

path_config = dir_kaggle_input / "foursquare-src/config.json"
path_test = dir_dataset_test / "test.csv"
dir_embeddings = dir_kaggle_working / "embeddings"
dir_pairs_output = dir_kaggle_working / "pairs"
os.makedirs(dir_pairs_output, exist_ok=True)

# nearest neigbors prediction
config = load_config(path_config)
print(f"nearest neighbors config: {config}")

for dir_fold_embeddings in dir_embeddings.glob("*"):
    make_pairs(
        config=config,
        path_locations=path_test,
        dir_embeddings=dir_fold_embeddings,
        path_pairs_output=dir_pairs_output / f"{dir_fold_embeddings.name}.csv")

In [None]:
%%python

# merge pairs

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import merge_pairs_dataset


# define paths
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models = dir_kaggle_input / "foursquare-models"

dir_embeddings = dir_kaggle_working / "embeddings"
dir_pairs = dir_kaggle_working / "pairs"
path_pairs_output = dir_kaggle_working / "pairs.csv"

merge_pairs_dataset(dir_pairs, path_pairs_output)

In [None]:
%%python

# add cosine distance

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import add_cosine_distances


# define paths
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models = dir_kaggle_input / "foursquare-models"

path_config = dir_kaggle_input / "foursquare-src/config.json"
dir_embeddings = dir_kaggle_working / "embeddings"
path_pairs = dir_kaggle_working / "pairs.csv"
path_pairs_output = dir_kaggle_working / "pairs_cosine.csv"

config = load_config(path_config)
print(f"nearest neighbors config: {config}")

add_cosine_distances(config, path_pairs, dir_embeddings, path_pairs_output)

In [None]:
%%python

# add cosine distances extra

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import add_cosine_distances_extra


# define paths
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models = dir_kaggle_input / "foursquare-models"

path_config = dir_kaggle_input / "foursquare-src/config.json"
dir_embeddings = dir_kaggle_working / "embeddings"
path_pairs = dir_kaggle_working / "pairs_cosine.csv"
path_pairs_output = dir_kaggle_working / "pairs_cosine.csv"

config = load_config(path_config)
print(f"nearest neighbors config: {config}")

for name_embedding in config["lgb"]["embeddings_extra"]:
    add_cosine_distances_extra(
        path_pairs=path_pairs, 
        path_pairs_output=path_pairs_output,
        dir_embeddings=dir_embeddings, 
        name_embedding=name_embedding)

In [None]:
%%python

# build pairs dataset

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import make_pairs_dataset


# define paths
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models = dir_kaggle_input / "foursquare-models"
dir_dataset_models_lgb = dir_dataset_models / "lgb"

path_config = dir_kaggle_input / "foursquare-src/config.json"
path_test = dir_dataset_test / "test.csv"
path_pairs = dir_kaggle_working / "pairs_cosine.csv"
path_pairs_output = dir_kaggle_working / "pairs_dataset.csv"

# nearest neigbors prediction
config = load_config(path_config)
print(f"nearest neighbors config: {config}")

make_pairs_dataset(
        config=config,
        path_pairs=path_pairs,
        path_locations=path_test,
        path_pairs_output=path_pairs_output)

In [None]:
%%python

# lgb prediction

import sys
sys.path.extend(["/kaggle/input/foursquare-src", "../.."])

import os
import pathlib

from foursquare.utils_io import load_config
from foursquare.pipeline_test import make_submission


# define paths
dir_kaggle_input = pathlib.Path("/kaggle/input")
dir_kaggle_working = pathlib.Path("/kaggle/working")

dir_dataset_test = dir_kaggle_input / "foursquare-location-matching"
dir_dataset_models = dir_kaggle_input / "foursquare-models"
dir_dataset_models_lgb = dir_dataset_models / "lgb"

path_config = dir_kaggle_input / "foursquare-src/config.json"
path_test = dir_dataset_test / "test.csv"
path_submission = dir_kaggle_working / "submission.csv"
path_pairs_dataset = dir_kaggle_working / "pairs_dataset.csv"

# nearest neigbors prediction
config = load_config(path_config)
print(f"nearest neighbors config: {config}")

make_submission(
    config=config, 
    path_locations=path_test,
    path_pairs_dataset=path_pairs_dataset,
    dir_lgb=dir_dataset_models_lgb,
    path_submission_output=path_submission)