# Inference notebook

In [1]:
import sys


def in_kaggle():
    return "kaggle_web_client" in sys.modules

In [2]:
##### INSERT SOURCE CODE HERE FOR SUBMISSION #####

if not in_kaggle():
    sys.path.append("..")
    sys.path.append("../../inputs")

In [3]:
if in_kaggle():
    # https://www.kaggle.com/speeddemon/install-hydra-offline-from-dataset

    !cp -r /kaggle/input/hydracore105 /kaggle/working
    !mv /kaggle/working/hydracore105/antlr4-python3-runtime-4.8.tar.gz.tmp /kaggle/working/hydracore105/antlr4-python3-runtime-4.8.tar.gz
    !ls /kaggle/working/hydracore105

    !pip install -qq /kaggle/working/hydracore105/* --ignore-installed PyYAML
    
    sys.path.append("../input/omegaconf/omegaconf-master")
    omega_conf_path = "config/main.yaml"
    
else:
    omega_conf_path = "../config/main.yaml"

In [4]:
from omegaconf import OmegaConf

c = OmegaConf.load(omega_conf_path)

c.settings.debug = False
c.wandb.enabled = False

if in_kaggle():
    c.settings.gpus = "0"

    c.settings.dirs.working = "."
    c.settings.dirs.input = "../input/ubiquant-market-prediction/"

    pretraind_dir = "../input/ump-models"

else:
    c.settings.dirs.working = ".."
    c.settings.dirs.input = "../../inputs/"

    pretraind_dir = "../../datasets/trainings"

pretrained = f"""
- dir: {pretraind_dir}/2022-02-01_20-23-49/fold0/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-23-51/fold1/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-23-53/fold2/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-23-55/fold3/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-23-58/fold4/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-24-00/fold5/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-24-02/fold6/
  model: ump_1
- dir: {pretraind_dir}/2022-02-01_20-24-04/fold7/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-02/fold8/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-04/fold9/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-06/fold10/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-08/fold11/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-11/fold12/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-13/fold13/
  model: ump_1
- dir: {pretraind_dir}/2022-02-02_00-43-15/fold14/
  model: ump_1

"""

_pretrained = f"""
- dir: {pretraind_dir}/2022-01-26_07-47-01/fold0/
  model: ump_1
- dir: {pretraind_dir}/2022-01-26_07-47-03/fold1/
  model: ump_1
- dir: {pretraind_dir}/2022-01-26_07-47-05/fold2/
  model: ump_1
- dir: {pretraind_dir}/2022-01-26_07-47-07/fold3/
  model: ump_1
- dir: {pretraind_dir}/2022-01-26_07-47-10/fold4/
  model: ump_1
- dir: {pretraind_dir}/2022-01-26_07-47-12/fold5/
  model: ump_1
- dir: {pretraind_dir}/2022-01-26_07-47-14/fold6/
  model: ump_1

- dir: {pretraind_dir}/2022-01-30_22-12-14/fold0/
  model: ump_1
- dir: {pretraind_dir}/2022-01-30_22-12-16/fold1/
  model: ump_1
- dir: {pretraind_dir}/2022-01-30_22-12-19/fold2/
  model: ump_1
- dir: {pretraind_dir}/2022-01-30_22-12-21/fold3/
  model: ump_1
- dir: {pretraind_dir}/2022-01-30_22-12-23/fold4/
  model: ump_1


"""

c.params.pretrained = OmegaConf.create(pretrained)

print(OmegaConf.to_yaml(c))

defaults:
- _self_
hydra:
  run:
    dir: ../outputs/${now:%Y-%m-%d_%H-%M-%S}
  job_logging:
    formatters:
      simple:
        format: '%(asctime)s [%(levelname)s][%(module)s] %(message)s'
wandb:
  enabled: false
  entity: imokuri
  project: ump
  dir: ${hydra:runtime.cwd}/../cache
  group: default
settings:
  print_freq: 100
  gpus: 6,7
  dirs:
    working: ..
    input: ../../inputs/
  inputs:
  - train.csv
  - example_test.csv
  - example_sample_submission.csv
  debug: false
  n_debug_data: 100000
  amp: true
  multi_gpu: true
params:
  seed: 440
  n_class: 1
  n_fold: 5
  skip_training: false
  epoch: 20
  es_patience: 0
  batch_size: 640
  gradient_acc_step: 1
  max_grad_norm: 1000
  fold: simple_cpcv
  group_name: investment_id
  time_name: time_id
  label_name: target
  feature_set:
  - f000
  dataset: ump_1
  model: ump_1dcnn
  pretrained:
  - dir: ../../datasets/trainings/2022-02-01_20-23-49/fold0/
    model: ump_1
  - dir: ../../datasets/trainings/2022-02-01_20-23-51/fold

In [5]:
import gc
import traceback

import src.utils as utils
import ubiquant
from src.feature_store import Store
from src.features.base import get_feature
from src.make_feature import make_feature
from src.make_model import load_model
from src.run_loop import inference

In [6]:
utils.fix_seed(c.params.seed)
utils.debug_settings(c)
device = utils.gpu_settings(c)

In [7]:
feature_set = ["f000"]

feature_set = list(sorted(list(set(feature_set))))
print(f"feature set: {feature_set}")

feature_func = [get_feature(f) for f in feature_set]

feature set: ['f000']


In [8]:
store = Store.empty()

In [9]:
models = load_model(c, device)

In [10]:
env = ubiquant.make_env()  # initialize the environment
iter_test = env.iter_test()  # an iterator which loops over the test set and sample submission

In [11]:
%%time

for test_df, sample_prediction_df in iter_test:
    try:
        gc.collect()

        for row in test_df.values:
            store.append(row)

        pred_df = make_feature(test_df, store, feature_set, load_from_store=False, save_to_store=False, debug=True)
        df = inference(c, pred_df, device, models)

        # df = inference(c, test_df, device, models)

        sample_prediction_df["target"] = df["target"]  # make your predictions here

    except Exception as e:
        print("ERROR!!!")
        print(traceback.format_exc())
        pass

    env.predict(sample_prediction_df)  # register your predictions

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
CPU times: user 5.74 s, sys: 18.2 s, total: 23.9 s
Wall time: 22.7 s
