In [None]:
%load_ext autoreload
%autoreload 2
%load_ext nb_black
%load_ext lab_black

<IPython.core.display.Javascript object>

In [None]:
# default_exp submission

<IPython.core.display.Javascript object>

# Submission

In [None]:
#hide
from nbdev.showdoc import *

<IPython.core.display.Javascript object>

In [None]:
#export
import uuid
import numpy as np
import pandas as pd
from pathlib import Path
from typeguard import typechecked
from abc import ABC, abstractmethod
from rich import print as rich_print
from numerapi import NumerAPI, SignalsAPI
from transparentpath import TransparentPath as GCSPath

from numerai_blocks.key import Key

<IPython.core.display.Javascript object>

## 1. Base

In [None]:
#export
@typechecked
class BaseSubmittor(ABC):
    def __init__(self, directory_path: str, key: Key):
        self.dir = Path(directory_path)
        self._create_directory()
        self.key = key

    @abstractmethod
    def save_csv(self, dataf: pd.DataFrame, file_name: str, target_columns: list, *args, **kwargs):
        """ Save DataFrame with 'id' + 'target_columns'. """
        ...

    @abstractmethod
    def upload_predictions(self, file_name: str, model_name: str, *args, **kwargs):
        """ Upload file for given model name through API. """
        ...

    def full_submission(self, dataf: pd.DataFrame, file_name: str, model_name: str, target_columns: list, *args, **kwargs):
        """ Save DataFrame and upload predictions through API. """
        self.save_csv(dataf=dataf, file_name=file_name, target_columns=target_columns, *args, **kwargs)
        self.upload_predictions(file_name=file_name, model_name=model_name, *args, **kwargs)

    def configure_gcs_path(self, bucket_name: str):
        """
        Connect to Google Cloud Storage (GCS) bucket.
        :param bucket_name: Valid GCS bucket that you have access to.

        Credentials are detected automatically with the following process:
        1.The environment variable `GOOGLE_APPLICATION_CREDENTIALS` is set and points to a valid `.json` file.
        2. You have a valid Cloud SDK installation. In that case you might see the warning : UserWarning: Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. It is up to you to decide what to do with it.
        3.The machine running the code is itself a GCP machine.
        """
        GCSPath.set_global_fs("gcs", bucket=bucket_name)
        self.dir = GCSPath(self.dir)
        self._create_directory()
        rich_print(f":cloud: Path {self.dir} configured for Google Cloud Storage. :cloud:")

    def _create_directory(self):
        """ Create base directory if it does not exist. """
        if not self.dir.is_dir():
            rich_print(f"No existing directory found at '[blue]{self.dir}[/blue]'. Creating directory...")
            self.dir.mkdir(parents=True, exist_ok=True)

    def __call__(self, dataf: pd.DataFrame, file_name: str, model_name: str, target_columns: list, *args, **kwargs):
        """
        The most common use case will be to create a CSV and submit it immediately after that.
        full_submission handles this.
        """
        self.full_submission(dataf=dataf,
                             file_name=file_name,
                             model_name=model_name,
                             target_columns=target_columns,
                             *args, **kwargs)

<IPython.core.display.Javascript object>

## 2. Numerai Classic

In [None]:
#export
class NumeraiClassicSubmittor(BaseSubmittor):
    """
    Submit for Numerai Classic through NumerAPI
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Classic.
    *args, **kwargs will be passed to NumerAPI initialization.
    """
    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        super(NumeraiClassicSubmittor, self).__init__(directory_path=directory_path, key=key)
        self.napi = NumerAPI(public_id=self.key.pub_id, secret_key=self.key.secret_key, *args, **kwargs)

    def save_csv(self, dataf: pd.DataFrame, file_name: str, target_columns: list, *args, **kwargs):
        full_path = str(self.dir / file_name)
        rich_print(f":page_facing_up: Saving predictions CSV to '{full_path}'. :page_facing_up:")
        dataf[target_columns].to_csv(full_path, *args, **kwargs)

    def upload_predictions(self, file_name: str, model_name: str, *args, **kwargs):
        full_path = str(self.dir / file_name)
        model_id = self._get_model_id(model_name=model_name)
        rich_print(f":airplane: Uploading predictions from '{full_path}' for model [bold blue]'{model_name}'[/bold blue] (model_id='{model_id}') :airplane:")
        self.napi.upload_predictions(file_path=full_path,
                                     model_id=model_id,
                                     *args, **kwargs)
        rich_print(f":thumbs_up: Submission of '{full_path}' for [bold blue]{model_name}[/bold blue] is successful! :thumbs_up:")

    def _get_model_id(self, model_name: str) -> str:
        """ Get ID needed for prediction uploading. """
        return self.get_model_mapping()[model_name]

    @property
    def get_model_mapping(self) -> dict:
        """ Mapping between raw model names and model IDs. """
        return self.napi.get_models()

<IPython.core.display.Javascript object>

In [None]:
# Initialization (Random credentials)
key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
num_sub = NumeraiClassicSubmittor(directory_path="test_sub",
                                  key=key)
assert num_sub.dir.is_dir()

# Create random dataframe
n_rows, n_columns = 100, 20
targets = [f"target_{i}" for i in range(n_columns)]
dataf = pd.DataFrame(np.random.uniform(size=(n_rows, n_columns)), columns=targets)
dataf['id'] = [uuid.uuid4() for _ in range(n_rows)]
dataf = dataf.set_index('id')
dataf.head(1)

Unnamed: 0_level_0,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
e7ea82dd-411c-4a5f-8f2f-b3ece34377f3,0.278266,0.57115,0.960328,0.674253,0.09427,0.984954,0.855347,0.935789,0.327705,0.213176,0.524148,0.189792,0.262066,0.360628,0.146035,0.567181,0.764313,0.353695,0.519514,0.146709


<IPython.core.display.Javascript object>

In [None]:
# Save CSV and upload
# num_sub.full_submission(dataf=dataf, file_name='test.csv', target_columns=targets, model_name="test")

<IPython.core.display.Javascript object>

In [None]:
num_sub.save_csv(dataf=dataf, file_name='test.csv', target_columns=targets)
pd.read_csv("test_sub/test.csv").head(2)

Unnamed: 0,id,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,...,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19
0,e7ea82dd-411c-4a5f-8f2f-b3ece34377f3,0.278266,0.57115,0.960328,0.674253,0.09427,0.984954,0.855347,0.935789,0.327705,...,0.524148,0.189792,0.262066,0.360628,0.146035,0.567181,0.764313,0.353695,0.519514,0.146709
1,bfb1ebed-0ed9-4ea5-9479-cbbee2cb02ca,0.810822,0.234707,0.529869,0.763643,0.145,0.155458,0.846352,0.658247,0.225318,...,0.866812,0.158678,0.136907,0.800027,0.57291,0.192533,0.096786,0.038681,0.437323,0.454955


<IPython.core.display.Javascript object>

## 3. Numerai Signals

<IPython.core.display.Javascript object>

In [None]:
#hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script; notebook2script()

Converted 00_download.ipynb.
Converted 01_dataloaders.ipynb.
Converted 02_dataset.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04a_model.ipynb.
Converted 04b_modelpipeline.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_prediction_dataset.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>