In [1]:
# hide
%load_ext autoreload
%autoreload 2

<IPython.core.display.Javascript object>

In [2]:
# hide
from numerai_blocks.download import NumeraiClassicDownloader
from numerai_blocks.numerframe import create_numerframe
from numerai_blocks.postprocessing import FeatureNeutralizer
from numerai_blocks.model import SingleModel
from numerai_blocks.model_pipeline import ModelPipeline
from numerai_blocks.key import load_key_from_json
from numerai_blocks.submission import NumeraiClassicSubmittor

<IPython.core.display.Javascript object>

# Numerai Blocks

> Tools for solid Numerai pipelines

## 1. Install

`pip install numerai-blocks`

## 2. How to use

### 2.1. Contents

Example and educational notebooks can be found in the `edu_nbs` directory. Development notebooks are in the `nbs` directory.

The library features the following tools to build your Numerai pipelines:

1. Downloaders
2. NumerFrame
3. Preprocessing
4. Model
5. Postprocessing
6. ModelPipeline (and ModelPipelineCollection)
7. Evaluators
8. Key (containing authentication info)
9. Submittors
10. Staking functionality

### 2.2. Examples

Below we will illustrate a few base use cases for inference pipelines. To learn more in-depth about the features of the framework check out notebooks in the `edu_nbs` directory.

#### 2.2.1. Numerai Classic

In [3]:
#other

# --- 1. Download version 2 data ---
downloader = NumeraiClassicDownloader("data")
downloader.download_inference_data("current_round")

# --- 2. Initialize NumerFrame ---
metadata = {"version": 2,
            "joblib_model_name": "test",
            "joblib_model_path": "test_assets/joblib_v2_example_model.joblib",
            "numerai_model_name": "test_model1",
            "key_path": "test_assets/test_credentials.json"
            }
dataf = create_numerframe(file_path="data/current_round/numerai_tournament_data.parquet",
                          metadata=metadata)

# --- 3. Define and run pipeline ---
model1 = SingleModel(dataf.meta.joblib_model_path,
                     model_name=dataf.meta.joblib_model_name)
# No preprocessing and 0.5 feature neutralization
pipeline = ModelPipeline(preprocessors=[],
                         models=[model1],
                         postprocessors=[FeatureNeutralizer(
                             pred_name=f"prediction_{dataf.meta.joblib_model_name}",
                             proportion=0.5
                         )]
                         )
dataset = pipeline(dataf)

# --- 4. Submit ---
# Random credentials
key = load_key_from_json(dataf.meta.key_path)
submittor = NumeraiClassicSubmittor(directory_path="sub_current_round", key=key)
# Only works with valid key credentials
# submittor.full_submission(dataf=dataf,
#                           cols=f"prediction_{dataf.meta.joblib_model_name}_neutralized_0.5",
#                           file_name=f"{dataf.meta.numerai_model_name}.csv",
#                           model_name=dataf.meta.numerai_model_name,
#                           version=dataf.meta.version
#                           )

# --- 5. Clean up environment (optional) ---
downloader.remove_base_directory()
submittor.remove_base_directory()

2022-02-14 16:28:05,183 INFO numerapi.utils: starting download
data/current_round/numerai_tournament_data.parquet: 582MB [02:11, 4.43MB/s]                             


f4103c3494664d32956928d93a256863 Preprocessing:: 0it [00:00, ?it/s]

f4103c3494664d32956928d93a256863 Model prediction:   0%|          | 0/1 [00:00<?, ?it/s]

2022-02-14 16:30:47,215 INFO numexpr.utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2022-02-14 16:30:47,217 INFO numexpr.utils: NumExpr defaulting to 8 threads.


f4103c3494664d32956928d93a256863 Postprocessing:   0%|          | 0/1 [00:00<?, ?it/s]

<IPython.core.display.Javascript object>

In [4]:
# hide_input
from rich.console import Console
from rich.tree import Tree

console = Console(record=True, width=100)

tree = Tree(":computer: Structure before starting", guide_style="bold bright_black")
model_tree = tree.add(":file_folder: test_assets")
model_tree.add(":page_facing_up: joblib_v2_example_model.joblib")
model_tree.add(":page_facing_up: test_credentials.json")

console.print(tree)

tree2 = Tree(":computer: Structure after submitting", guide_style="bold bright_black")
data_tree = tree2.add(":file_folder: data")
current_tree = data_tree.add(":file_folder: current_round")
current_tree.add(":page_facing_up: numerai_tournament_data.parquet")
sub_tree = tree2.add(":file_folder: sub_current_round")
sub_tree.add(":page_facing_up: test_model1.csv")
model_tree = tree.add(":file_folder: test_assets")
model_tree.add(":page_facing_up: joblib_v2_example_model.joblib")
model_tree.add(":page_facing_up: test_credentials.json")

console.print(tree2)

<IPython.core.display.Javascript object>

## Contributing

After you clone this repository, please run `nbdev_install_git_hooks` in your terminal. This sets up git hooks, which clean up the notebooks to remove the extraneous stuff stored in the notebooks (e.g. which cells you ran) which causes unnecessary merge conflicts.

### Branch structure


Every new feature should be implemented a branch that branches from `dev` and has the naming convention `feature/{FEATURE_DESCRIPTION}`.

In [5]:
# hide_input
console = Console(record=True, width=100)

tree = Tree("Branch structure", guide_style="bold bright_black")

main_tree = tree.add("📦 main (release)", guide_style="bright_black")
dev_tree = main_tree.add("👨‍💻 dev")
feature_tree = dev_tree.add(":sparkles: feature/1")
dev_tree.add(":sparkles: feature/2")
dev_tree.add(":sparkles: feature/3")

console.print(tree)

<IPython.core.display.Javascript object>

In [6]:
# hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script

notebook2script()

Converted 01_download.ipynb.
Converted 02_numerframe.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04_model.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_modelpipeline.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted 10_staking.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>