In [8]:
# hide
%load_ext autoreload
%autoreload 2
%load_ext nb_black
%load_ext lab_black

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black
The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


<IPython.core.display.Javascript object>

In [9]:
# hide
from numerai_blocks import *
from numerai_blocks.download import NumeraiClassicDownloader
from numerai_blocks.dataset import create_dataset
from numerai_blocks.postprocessing import FeatureNeutralizer
from numerai_blocks.model import JoblibModel
from numerai_blocks.model_pipeline import ModelPipeline
from numerai_blocks.key import load_key_from_json
from numerai_blocks.submission import NumeraiClassicSubmittor

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Numerai Blocks

> Tools for solid Numerai pipelines

## 1. Install

`pip install numerai-blocks`

## 2. How to use

### 2.1. Contents

Example and educational notebooks can be found in the `edu_nbs` directory. Development notebooks are in the `nbs` directory.

The library features the following tools to build your Numerai pipelines:

- Downloaders
- Dataloaders
- Dataset objects (with arbitrary metadata)
- Preprocessing
- Model
- ModelPipeline and ModelPipelineCollection
- Postprocessing
- Prediction dataset (with arbitrary metadata)
- Evaluators
- Key (containing authentication info)
- Submittors
- Staker

### 2.2. Quick Examples

#### 2.2.1. Numerai Classic

In [10]:
# slow
# # Download version 2 data
# downloader = NumeraiClassicDownloader("data")
# downloader.download_inference_data("current_round")
#
# # Initialize Dataset
# metadata = {"version": 2, "model_name": "MY_MODEL"}
# dataset = create_dataset(file_path="data/current_round/numerai_tournament_data.parquet", **metadata)
#
# # Define and run pipeline
# model1 = JoblibModel(model_directory="dir_with_joblib_models",
#                      model_name="test_model")
# pipeline = ModelPipeline(pipeline_name=dataset.base_model_name,
#                              preprocessors=[],
#                              models=[model1],
#                              postprocessors=[FeatureNeutralizer(proportion=0.5)])
# dataset = pipeline(dataset)
#
# # Submit
# key = load_key_from_json("my_key.json")
# submittor = NumeraiClassicSubmittor(directory_path="sub_current_round", key=key)
# submittor.full_submission(dataf=dataset.dataf,
#                           cols="prediction_test_model_neutralized_0.5",
#                           file_name=f"{dataset.model_name}.csv",
#                           model_name=dataset.model_name,
#                           versio=dataset.version
#                           )
#
# # Remove data and subs
# downloader.remove_base_directory()
# submittor.remove_base_directory()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
# hide_input
from rich.console import Console
from rich.tree import Tree

console = Console(record=True, width=100)

tree = Tree(":computer: Structure before starting", guide_style="bold bright_black")
tree.add(":page_facing_up: my_key.json")
model_tree = tree.add(":file_folder: dir_with_joblib_models")
model_tree.add(":page_facing_up: model1.joblib")
model_tree.add(":page_facing_up: model2.joblib")
model_tree.add(":page_facing_up: model3.joblib")
model_tree.add(":page_facing_up: model4.joblib")
model_tree.add(":page_facing_up: model5.joblib")

console.print(tree)

tree2 = Tree(":computer: Structure after submitting", guide_style="bold bright_black")
data_tree = tree2.add(":file_folder: data")
current_tree = data_tree.add(":file_folder: current_round")
current_tree.add(":page_facing_up: numerai_tournament_data.parquet")
sub_tree = tree2.add(":file_folder: sub_current_round")
sub_tree.add(":page_facing_up: MY_MODEL.csv")
tree2.add(":page_facing_up: my_key.json")
model_tree = tree2.add(":file_folder: dir_with_joblib_models")
model_tree.add(":page_facing_up: model1.joblib")
model_tree.add(":page_facing_up: model2.joblib")
model_tree.add(":page_facing_up: model3.joblib")
model_tree.add(":page_facing_up: model4.joblib")
model_tree.add(":page_facing_up: model5.joblib")

console.print(tree2)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### 2.2.2. Numerai Signals

In [12]:
# slow

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Contributing

After you clone this repository, please run `nbdev_install_git_hooks` in your terminal. This sets up git hooks, which clean up the notebooks to remove the extraneous stuff stored in the notebooks (e.g. which cells you ran) which causes unnecessary merge conflicts.

### Branch structure


In [13]:
# hide_input
console = Console(record=True, width=100)

tree = Tree("Branch structure", guide_style="bold bright_black")

main_tree = tree.add("📦 main (release)", guide_style="bright_black")
dev_tree = main_tree.add("👨‍💻 dev")
feature_tree = dev_tree.add(":sparkles: feature/1")
dev_tree.add(":sparkles: feature/2")
dev_tree.add(":sparkles: feature/3")

console.print(tree)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [14]:
# hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script

notebook2script()

Converted 01_download.ipynb.
Converted 02_dataset.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04_model.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_modelpipeline.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted 10_staking.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>

Converted 01_download.ipynb.
Converted 02_dataset.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04_model.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_modelpipeline.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted 10_staking.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>