In [None]:
%load_ext autoreload
%autoreload 2
%load_ext nb_black
%load_ext lab_black

<IPython.core.display.Javascript object>

In [None]:
# default_exp submission

<IPython.core.display.Javascript object>

# Submission

In [None]:
#hide
from nbdev.showdoc import *

<IPython.core.display.Javascript object>

In [None]:
#export
import os
import uuid
import numpy as np
import pandas as pd
from typing import Union
from pathlib import Path
from copy import deepcopy
from random import choices
from datetime import datetime
from abc import abstractmethod
from typeguard import typechecked
from string import ascii_uppercase
from rich import print as rich_print
from numerapi import NumerAPI, SignalsAPI
from dateutil.relativedelta import relativedelta, FR

from numerai_blocks.download import BaseIO
from numerai_blocks.key import Key

<IPython.core.display.Javascript object>

## 1. Base

In [None]:
#export
@typechecked
class BaseSubmittor(BaseIO):
    def __init__(self, directory_path: str, api: Union[NumerAPI, SignalsAPI]):
        super(BaseSubmittor, self).__init__(directory_path)
        self.dir = Path(directory_path)
        self._create_directory()
        self.api = api

    @abstractmethod
    def save_csv(self, dataf: pd.DataFrame, file_name: str, cols: list, *args, **kwargs):
        """
        For Numerai Classic: Save index column + 'cols' (targets) to CSV.
        For Numerai Signals: Save ticker, friday_date, data_type and signal columns to CSV.
        """
        ...

    def upload_predictions(self, file_name: str, model_name: str, *args, **kwargs):
        """
        Upload CSV file to Numerai for given model name.
        :param file_name: File name/path relative to directory_path.
        :param model_name: Lowercase raw model name (For example, 'integration_test').
        """
        full_path = str(self.dir / file_name)
        model_id = self._get_model_id(model_name=model_name)
        api_type = str(self.api.__class__.__name__)
        rich_print(f":airplane: {api_type}: Uploading predictions from '{full_path}' for model [bold blue]'{model_name}'[/bold blue] (model_id='{model_id}') :airplane:")
        self.api.upload_predictions(file_path=full_path,
                                    model_id=model_id,
                                    *args, **kwargs)
        rich_print(f":thumbs_up: {api_type} submission of '{full_path}' for [bold blue]{model_name}[/bold blue] is successful! :thumbs_up:")

    def full_submission(self, dataf: pd.DataFrame, file_name: str, model_name: str, cols: list, *args, **kwargs):
        """ Save DataFrame and upload predictions through API. """
        self.save_csv(dataf=dataf, file_name=file_name, cols=cols, *args, **kwargs)
        self.upload_predictions(file_name=file_name, model_name=model_name, *args, **kwargs)

    def __call__(self, dataf: pd.DataFrame, file_name: str, model_name: str, cols: list, *args, **kwargs):
        """
        The most common use case will be to create a CSV and submit it immediately after that.
        full_submission handles this.
        """
        self.full_submission(dataf=dataf,
                             file_name=file_name,
                             model_name=model_name,
                             cols=cols,
                             *args, **kwargs)

    def _get_model_id(self, model_name: str) -> str:
        """ Get ID needed for prediction uploading. """
        return self.get_model_mapping()[model_name]

    @property
    def get_model_mapping(self) -> dict:
        """ Mapping between raw model names and model IDs. """
        return self.api.get_models()

<IPython.core.display.Javascript object>

## 2. Numerai Classic

In [None]:
#export
class NumeraiClassicSubmittor(BaseSubmittor):
    """
    Submit for Numerai Classic.
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Classic.
    *args, **kwargs will be passed to NumerAPI initialization.
    """
    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        api = NumerAPI(public_id=key.pub_id, secret_key=key.secret_key, *args, **kwargs)
        super(NumeraiClassicSubmittor, self).__init__(directory_path=directory_path, api=api)

    def save_csv(self, dataf: pd.DataFrame, file_name: str, cols: list, *args, **kwargs):
        """
        :param dataf: DataFrame which should have the following columns:
        1. id (as index column)
        2. cols (for example ['target'] or [20_NUMERAI_TARGETS]).
        """
        full_path = str(self.dir / file_name)
        rich_print(f":page_facing_up: Saving predictions CSV to '{full_path}'. :page_facing_up:")
        dataf[cols].to_csv(full_path, *args, **kwargs)


<IPython.core.display.Javascript object>

### NumeraiClassicSubmittor tests

In [None]:
# Initialization (Random credentials)
test_dir = "test_sub"
classic_key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
num_sub = NumeraiClassicSubmittor(directory_path=test_dir,
                                  key=classic_key)
assert num_sub.dir.is_dir()

# Create random dataframe
n_rows, n_columns = 100, 20
targets = [f"target_{i}" for i in range(n_columns)]
test_dataf = pd.DataFrame(np.random.uniform(size=(n_rows, n_columns)), columns=targets)
test_dataf['id'] = [uuid.uuid4() for _ in range(n_rows)]
test_dataf = test_dataf.set_index('id')
test_dataf.head(2)

Unnamed: 0_level_0,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
858d4d34-feb6-4019-8e11-818cb1d1afc5,0.822536,0.097015,0.979799,0.189535,0.250934,0.686003,0.835909,0.041455,0.240206,0.482361,0.754157,0.135181,0.151749,0.404956,0.884712,0.840387,0.582667,0.359673,0.595743,0.383371
d8e1f149-e1cb-431c-a964-d5e1df213e8a,0.819038,0.010192,0.214979,0.576635,0.290451,0.067624,0.715359,0.044471,0.626935,0.790519,0.269919,0.909983,0.519294,0.376481,0.191359,0.016604,0.978589,0.90324,0.761912,0.880896


<IPython.core.display.Javascript object>

In [None]:
file_name = 'test.csv'
num_sub.save_csv(dataf=test_dataf, file_name=file_name, cols=targets)
pd.read_csv(f"{test_dir}/{file_name}").head(2)

Unnamed: 0,id,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,...,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19
0,858d4d34-feb6-4019-8e11-818cb1d1afc5,0.822536,0.097015,0.979799,0.189535,0.250934,0.686003,0.835909,0.041455,0.240206,...,0.754157,0.135181,0.151749,0.404956,0.884712,0.840387,0.582667,0.359673,0.595743,0.383371
1,d8e1f149-e1cb-431c-a964-d5e1df213e8a,0.819038,0.010192,0.214979,0.576635,0.290451,0.067624,0.715359,0.044471,0.626935,...,0.269919,0.909983,0.519294,0.376481,0.191359,0.016604,0.978589,0.90324,0.761912,0.880896


<IPython.core.display.Javascript object>

In [None]:
# TODO Test full submission with dummy credentials
# Save CSV and upload
# num_sub.full_submission(dataf=test_dataf, file_name='test.csv', cols=targets, model_name="test")

<IPython.core.display.Javascript object>

In [None]:
# Remove contents
num_sub.remove_base_directory()
assert not os.path.exists(test_dir)

<IPython.core.display.Javascript object>

## 3. Numerai Signals

In [None]:
#export
class NumeraiSignalsSubmittor(BaseSubmittor):
    """
    Submit for Numerai Signals
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Signals.
    *args, **kwargs will be passed to SignalsAPI initialization.
    """
    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        api = SignalsAPI(public_id=key.pub_id, secret_key=key.secret_key, *args, **kwargs)
        super(NumeraiSignalsSubmittor, self).__init__(directory_path=directory_path, api=api)
        self.supported_ticker_formats = ['ticker', 'cusip', 'sedol', 'numerai_ticker', 'bloomberg_ticker']

    def save_csv(self, dataf: pd.DataFrame, file_name: str, cols : list = None,
                 *args, **kwargs):
        """
        :param dataf: DataFrame which should have at least the following columns:
         1. ticker [cusip, sedol, numerai_ticker or bloomberg_ticker]
         2. friday_date
         3. data_type
         4. signal (Values between 0 and 1 (exclusive))
         :param file_name: Name for file (For example 'sub_<model_name>_round<n>.csv')
         :param cols: All cols that should be passed to CSV. Defaults to 4 standard columns.
          ('ticker', 'friday_date', 'data_type' and 'signal')
        """
        if not cols:
            cols = ['ticker', 'friday_date', 'data_type', 'signal']

        # Check for valid ticker format
        valid_tickers = set(cols).intersection(set(self.supported_ticker_formats))
        if not valid_tickers:
            raise NotImplementedError(f"Ticker format used in given 'target_columns' ({cols}) is not supported.")

        # signal must be in range (0...1)
        if not dataf['signal'].between(0, 1).all():
            min_val, max_val = dataf['signal'].min(), dataf['signal'].max()
            raise ValueError(f"Values in 'signal' must be between 0 and 1 (exclusive). Found min value of '{min_val}' and max value of '{max_val}'")

        full_path = str(self.dir / file_name)
        rich_print(f":page_facing_up: Saving Signals predictions CSV to '{full_path}'. :page_facing_up:")
        dataf[cols].reset_index(drop=True).to_csv(full_path, index=False, *args, **kwargs)

<IPython.core.display.Javascript object>

### NumeraiSignalsSubmittor tests

In [None]:
# Initialization (Random credentials)
test_dir_signals = "test_sub_signals"
signals_key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
signals_sub = NumeraiSignalsSubmittor(directory_path=test_dir_signals,
                                  key=signals_key)
assert signals_sub.dir.is_dir()

<IPython.core.display.Javascript object>

In [None]:
# Create random dataframe
n_rows, n_columns = 5000, 4
signals_test_dataf = pd.DataFrame(np.random.uniform(size=(n_rows, 1)), columns=['signal'])
signals_test_dataf['ticker'] = [''.join(choices(ascii_uppercase, k=4)) for _ in range(n_rows)]
last_friday = int(str((datetime.now() + relativedelta(weekday=FR(-1))).date()).replace("-", ""))
signals_test_dataf['friday_date'] = last_friday
signals_test_dataf['data_type'] = 'live'
signals_test_dataf['aux_column'] = "metadata"
signals_test_dataf.head(2)

Unnamed: 0,signal,ticker,friday_date,data_type,aux_column
0,0.04903,NFRM,20211231,live,metadata
1,0.561162,PKLX,20211231,live,metadata


<IPython.core.display.Javascript object>

In [None]:
signals_cols = ['signal', 'ticker', 'friday_date', 'data_type']
file_name = 'signals_test.csv'
signals_sub.save_csv(dataf=signals_test_dataf, file_name=file_name, cols=signals_cols)
pd.read_csv(f"{test_dir_signals}/{file_name}").head(2)

Unnamed: 0,signal,ticker,friday_date,data_type
0,0.04903,NFRM,20211231,live
1,0.561162,PKLX,20211231,live


<IPython.core.display.Javascript object>

Saving Signals CSV should fail if there is no valid ticker column or if `signal` has values outside the range $(0...1)$.

In [None]:
def test_signal_validity(submittor: NumeraiSignalsSubmittor, signals_dataf: pd.DataFrame):
    try:
        invalid_signal = deepcopy(signals_dataf)
        invalid_signal['signal'] += 10
        submittor.save_csv(invalid_signal, file_name="should_not_save.csv", cols=list(invalid_signal.columns))
    except ValueError:
        return True
    return False

def test_ticker_validity(submittor: NumeraiSignalsSubmittor, signals_dataf: pd.DataFrame):
    try:
        invalid_ticker = deepcopy(signals_dataf)
        invalid_ticker = invalid_ticker.rename({'ticker': 'not_a_valid_ticker_format'}, axis=1)
        submittor.save_csv(invalid_ticker, file_name="should_not_save.csv", cols=list(invalid_ticker.columns))
    except NotImplementedError:
        return True
    return False

assert test_signal_validity(signals_sub, signals_test_dataf)
assert test_ticker_validity(signals_sub, signals_test_dataf)

<IPython.core.display.Javascript object>

In [None]:
# TODO Test full submission with dummy credentials
# Save CSV and upload
# signals_sub.full_submission(dataf=signals_test_dataf, file_name='signals_test.csv', cols=signals_cols, model_name="test")

<IPython.core.display.Javascript object>

In [None]:
# Remove contents
signals_sub.remove_base_directory()
assert not os.path.exists(test_dir_signals)

<IPython.core.display.Javascript object>

------------------------------------------------------------

In [None]:
#hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script; notebook2script()

Converted 00_download.ipynb.
Converted 01_dataloaders.ipynb.
Converted 02_dataset.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04a_model.ipynb.
Converted 04b_modelpipeline.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_prediction_dataset.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>