In [9]:
from pathlib import Path
from pydantic import BaseModel, root_validator
from typing import Optional
import ray
from ray import tune


We could just make a config like this

In [10]:
config = {"input_size": 3, "output_size": 20, "data_dir": Path(".")}


In [11]:
config["input_size"]

3

But we get a config on steroids with pydantic

In [12]:
class SearchSpace(BaseModel):
    input_size: int
    output_size: int
    tune_dir: Optional[Path]
    data_dir: Path


Note how the `"."` data_dir becomes a `PosixPath`
Note how `Optional` allows for leaving the argument out, and the value defaults to `None`.

In [15]:
config = SearchSpace(input_size=3.0, output_size=20, data_dir=".")
config


SearchSpace(input_size=3, output_size=20, tune_dir=None, data_dir=PosixPath('.'))

If possible, it will cast elements, e.g. `input_size="3"` becomes an integer

In [14]:
config = SearchSpace(input_size="3", output_size=20, data_dir=".")
config


SearchSpace(input_size=3, output_size=20, tune_dir=None, data_dir=PosixPath('.'))

And if you try to give `data_dir` something that can't be cast to a `Path`, you will get an error.
The advantage is that you get your errors at the place where you make them, and not 10 steps later when running the trainloop...

In [17]:
try:
    config = SearchSpace(input_size="3", output_size=20, data_dir=3.4)
except ValueError as e:
    print(e)


1 validation error for SearchSpace
data_dir
  value is not a valid path (type=type_error.path)


Let's add the ray.tune ranges. To find out what the type is, we simple call the `type()` method.

In [18]:
type(1.0)

float

In [19]:
type(tune.uniform(0.0, 10.0))


ray.tune.sample.Float

But if we simply add that...

In [20]:
from typing import Union, Optional, Dict

SAMPLE_INT = ray.tune.sample.Integer

try:

    class SearchSpace(BaseModel):
        input_size: int
        hidden_size: Union[int, SAMPLE_INT] = tune.randint(16, 128)
        output_size: int
        tune_dir: Optional[Path]
        data_dir: Path

except RuntimeError as e:
    print(e)


no validator found for <class 'ray.tune.sample.Integer'>, see `arbitrary_types_allowed` in Config


Pydantic complains that it does not know how to validate the type. A simple solution is to add `arbitrary_types_allowed`

In [21]:
class SearchSpace(BaseModel):
    input_size: int
    hidden_size: Union[int, SAMPLE_INT]
    output_size: int = 20
    tune_dir: Path = "."
    data_dir: Path

    class Config:
        arbitrary_types_allowed = True


config = SearchSpace(input_size=3, hidden_size=32, data_dir=".")
config


SearchSpace(input_size=3, hidden_size=32, output_size=20, tune_dir='.', data_dir=PosixPath('.'))

Because of the `Union`, an integer will work

In [22]:
config = SearchSpace(input_size=3, hidden_size=tune.randint(16, 128), data_dir=".")
config


SearchSpace(input_size=3, hidden_size=<ray.tune.sample.Integer object at 0x11b42a580>, output_size=20, tune_dir='.', data_dir=PosixPath('.'))

And a `tune.randint` will work.

In [23]:
try:
    config = SearchSpace(input_size=3, hidden_size=tune.uniform(0.0, 0.5), data_dir=".")
except Exception as e:
    print(e)


2 validation errors for SearchSpace
hidden_size
  value is not a valid integer (type=type_error.integer)
hidden_size
  instance of Integer expected (type=type_error.arbitrary_type; expected_arbitrary_type=Integer)


But a `tune.uniform` fails!

Also, pydantic wont know how to check for `SAMPLE_INT`.
You can write your own validator for a class. Implement a `__get_validators__` function,
which will yield one or more validators. You can find more on that in the [documentation](https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types)


In [24]:
class SampleFloat:
    @classmethod
    def __get_validators__(cls):
        yield cls.validate

    @classmethod
    def validate(cls, v):
        if not isinstance(v, ray.tune.sample.Float):
            print(f"testing {v}")
            raise TypeError(f"{ray.tune.sample.Float} required, found {type(v)}")
        return v


We just ran a simple check. But you can imagine more complex checks (e.g. for phone numbers etc)

In [25]:
class SearchSpace(BaseModel):
    dropout: SampleFloat


try:
    config = SearchSpace(dropout=tune.randint(16, 32))
except Exception as e:
    print(e)


testing <ray.tune.sample.Integer object at 0x11b3dc7c0>
1 validation error for SearchSpace
dropout
  <class 'ray.tune.sample.Float'> required, found <class 'ray.tune.sample.Integer'> (type=type_error)


However, in our case, it does not add anything more than we already had with arbitrary types.

In [26]:
SAMPLE_INT = ray.tune.sample.Integer
SAMPLE_FLOAT = ray.tune.sample.Float


class SearchSpace(BaseModel):
    input_size: int
    hidden_size: Union[int, SAMPLE_INT]
    dropout: Union[float, SAMPLE_FLOAT]
    num_layers: Union[int, SAMPLE_INT]
    output_size: int
    tune_dir: Optional[Path]
    data_dir: Path

    class Config:
        arbitrary_types_allowed = True


config = SearchSpace(
    input_size=3,
    hidden_size=tune.randint(16, 128),
    dropout=tune.uniform(0.0, 0.3),
    num_layers=2,
    output_size=20,
    data_dir=".",
)
config


SearchSpace(input_size=3, hidden_size=<ray.tune.sample.Integer object at 0x11b24ccd0>, dropout=<ray.tune.sample.Float object at 0x11b24cd60>, num_layers=2, output_size=20, tune_dir=None, data_dir=PosixPath('.'))

But what if we want to protect againts adding non-existing paths?

In [27]:
data_dir = Path("data/a/b").absolute()
data_dir.exists(), data_dir


(False,
 PosixPath('/Users/rgrouls/code/deep_learning/notebooks/4_tuning_networks/data/a/b'))

In [28]:
config = SearchSpace(
    input_size=3,
    hidden_size=32,
    dropout=0.1,
    num_layers=2,
    output_size=20,
    data_dir=data_dir,
)
config


SearchSpace(input_size=3, hidden_size=32, dropout=0.1, num_layers=2, output_size=20, tune_dir=None, data_dir=PosixPath('/Users/rgrouls/code/deep_learning/notebooks/4_tuning_networks/data/a/b'))

We can add a `root_validator` to run an additional check before creation.

In [29]:
class SearchSpace(BaseModel):

    input_size: int
    hidden_size: Union[int, SAMPLE_INT] = tune.randint(16, 128)
    dropout: Union[float, SAMPLE_FLOAT] = tune.uniform(0.0, 0.3)
    num_layers: Union[int, SAMPLE_INT] = tune.randint(2, 5)
    output_size: int
    tune_dir: Optional[Path]
    data_dir: Path

    class Config:
        arbitrary_types_allowed = True

    @root_validator
    def check_path(cls, values: Dict) -> Dict:  # noqa: N805
        datadir = values.get("data_dir")
        if not datadir.exists():
            raise FileNotFoundError(
                f"Make sure the datadir exists.\n Found {datadir} to be non-existing."
            )
        return values


try:
    config = SearchSpace(
        input_size=3,
        hidden_size=32,
        dropout=0.1,
        num_layers=2,
        output_size=20,
        data_dir=data_dir,
    )
except FileNotFoundError as e:
    print(e)


Make sure the datadir exists.
 Found /Users/rgrouls/code/deep_learning/notebooks/4_tuning_networks/data/a/b to be non-existing.


This can really safe you a lot of headaches!

A last trick is to use inheritance. We can make a baseclass, and inherit all the validators etc, and just add the additional stuff specific to our model.

In [165]:
class BaseSearchSpace(BaseModel):

    input_size: int
    output_size: int
    tune_dir: Optional[Path]
    data_dir: Path

    class Config:
        arbitrary_types_allowed = True

    @root_validator
    def check_path(cls, values: Dict) -> Dict:  # noqa: N805
        datadir = values.get("data_dir")
        if not datadir.exists():
            raise FileNotFoundError(
                f"Make sure the datadir exists.\n Found {datadir} to be non-existing."
            )
        return values


class SearchSpace(BaseSearchSpace):
    hidden_size: Union[int, SAMPLE_INT] = tune.randint(16, 128)
    dropout: Union[float, SAMPLE_FLOAT] = tune.uniform(0.0, 0.3)
    num_layers: Union[int, SAMPLE_INT] = tune.randint(2, 5)


In [166]:
data_dir = Path("../../data/external/gestures-dataset").absolute()
config = SearchSpace(
    input_size=3,
    hidden_size=tune.randint(16, 128),
    dropout=0.1,
    num_layers=2,
    output_size=20,
    data_dir=data_dir,
)
config


SearchSpace(input_size=3, output_size=20, tune_dir=None, data_dir=PosixPath('/Users/rgrouls/code/deep_learning/notebooks/4_tuning_networks/../../data/external/gestures-dataset'), hidden_size=<ray.tune.sample.Integer object at 0x11e47cd00>, dropout=0.1, num_layers=2)

We can access items like this:

In [167]:
config.data_dir


PosixPath('/Users/rgrouls/code/deep_learning/notebooks/4_tuning_networks/../../data/external/gestures-dataset')

We also get transformation into a dictionary for free:

In [168]:
config.dict()


{'input_size': 3,
 'output_size': 20,
 'tune_dir': None,
 'data_dir': PosixPath('/Users/rgrouls/code/deep_learning/notebooks/4_tuning_networks/../../data/external/gestures-dataset'),
 'hidden_size': <ray.tune.sample.Integer at 0x11e47cd00>,
 'dropout': 0.1,
 'num_layers': 2}