TODO: add basic description

## Basic setup

### imports

In [None]:
import numpy as np
from sklearn.datasets import make_classification
import torch
from torch import nn
import torch.nn.functional as F

### constants

In [1]:
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0);

In [2]:
DEVICE = 'cpu'  # change to 'cuda' if so desired

### A toy binary classification task

In [3]:
X, y = make_classification(10000, 20, n_informative=10, random_state=0)
X = X.astype(np.float32)

In [4]:
X.shape, y.shape, y.mean()

((10000, 20), (10000,), 0.5003)

### Definition of the `pytorch` classification `module`

We define a vanilla neural network with two hidden layers. The output layer should have 2 output units since there are two classes. In addition, it should have a softmax nonlinearity, because later, when calling `predict_proba`, the output from the `forward` call will be used.

In [5]:
class MyModule(nn.Module):
    def __init__(self, num_units=10, dropout=0.5):
        super().__init__()

        self.dense = nn.Linear(20, num_units)
        self.dropout = nn.Dropout(dropout)
        self.output = nn.Linear(num_units, 2)

    def forward(self, X, **kwargs):
        X = F.relu(self.dense(X))
        X = self.dropout(X)
        X = F.softmax(self.output(X), dim=-1)
        return X

## Reduction of boilerplate code

### Pure PyTorch implementation

In [6]:
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0)

In [9]:
ds_train = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
loader_train = torch.utils.data.DataLoader(ds_train, batch_size=256, shuffle=True)
ds_valid = torch.utils.data.TensorDataset(torch.from_numpy(X_valid), torch.from_numpy(y_valid))
loader_valid = torch.utils.data.DataLoader(ds_valid, batch_size=256)
module = MyModule()
optimizer = torch.optim.SGD(module.parameters(), lr=0.02)
criterion = nn.NLLLoss()
template = "epoch: {} | loss train: {:.4f} | loss valid: {:.4f} | acc valid: {:.4f} | dur: {:.3f}"

In [10]:
for epoch in range(20):
    tic = time.time()
    losses_train = []
    for Xb, yb in loader_train:
        y_proba = module(Xb)
        loss = criterion(torch.log(y_proba), yb)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        losses_train.append(loss.item())
        
    losses_valid = []
    accuracy_valid = []
    for Xb, yb in loader_valid:
        y_proba = module(Xb)
        loss = criterion(torch.log(y_proba), yb)
        optimizer.step()
        optimizer.zero_grad()
        losses_valid.append(loss.item())
        accuracy_valid.append(accuracy_score(yb.numpy(), y_proba.argmax(1).numpy()))
        
    toc = time.time() - tic
    print(template.format(
        epoch + 1, np.mean(losses_train), np.mean(losses_valid), np.mean(accuracy_valid), toc))

epoch: 1 | loss train: 0.6806 | loss valid: 0.6352 | acc valid: 0.6173 | dur: 0.130
epoch: 2 | loss train: 0.6395 | loss valid: 0.6256 | acc valid: 0.6316 | dur: 0.076
epoch: 3 | loss train: 0.6225 | loss valid: 0.6095 | acc valid: 0.6558 | dur: 0.105
epoch: 4 | loss train: 0.6078 | loss valid: 0.6011 | acc valid: 0.6606 | dur: 0.080
epoch: 5 | loss train: 0.5979 | loss valid: 0.5895 | acc valid: 0.6721 | dur: 0.075
epoch: 6 | loss train: 0.5802 | loss valid: 0.5760 | acc valid: 0.6757 | dur: 0.076
epoch: 7 | loss train: 0.5689 | loss valid: 0.5670 | acc valid: 0.6865 | dur: 0.084
epoch: 8 | loss train: 0.5572 | loss valid: 0.5520 | acc valid: 0.7170 | dur: 0.078
epoch: 9 | loss train: 0.5446 | loss valid: 0.5493 | acc valid: 0.7095 | dur: 0.076
epoch: 10 | loss train: 0.5452 | loss valid: 0.5328 | acc valid: 0.7179 | dur: 0.082
epoch: 11 | loss train: 0.5286 | loss valid: 0.5331 | acc valid: 0.7279 | dur: 0.075
epoch: 12 | loss train: 0.5234 | loss valid: 0.5055 | acc valid: 0.7342 | 

### The same with skorch

In [11]:
from skorch import NeuralNetClassifier

In [12]:
net = NeuralNetClassifier(
    MyModule,
    module__num_units=50,
    max_epochs=20,
    lr=0.02,
    batch_size=256,
    iterator_train__shuffle=True,
    device=DEVICE,
)

In [13]:
net.fit(X, y)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7074[0m       [32m0.7066[0m        [35m0.6111[0m  0.1349
      2        [36m0.6085[0m       [32m0.7671[0m        [35m0.5597[0m  0.1339
      3        [36m0.5686[0m       [32m0.7881[0m        [35m0.5253[0m  0.1347
      4        [36m0.5526[0m       [32m0.8046[0m        [35m0.5001[0m  0.1355
      5        [36m0.5281[0m       [32m0.8121[0m        [35m0.4778[0m  0.1277
      6        [36m0.5121[0m       [32m0.8276[0m        [35m0.4586[0m  0.1389
      7        [36m0.4927[0m       [32m0.8306[0m        [35m0.4411[0m  0.1315
      8        [36m0.4814[0m       [32m0.8396[0m        [35m0.4276[0m  0.1286
      9        [36m0.4763[0m       [32m0.8421[0m        [35m0.4153[0m  0.1386
     10        [36m0.4639[0m       [32m0.8491[0m        [35m0.4027[0m  0.1392
     11        [36m0.4609[0m       [32m0.85

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense): Linear(in_features=20, out_features=50, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (output): Linear(in_features=50, out_features=2, bias=True)
  ),
)

## Compatibility with sklearn API

### Support for the basic methods

In [23]:
from sklearn.base import clone
from sklearn.model_selection import cross_validate

In [14]:
y_pred = net.predict(X[:5])
y_pred

array([0, 1, 1, 0, 1])

In [15]:
y_proba = net.predict_proba(X[:5])
y_proba

array([[0.7972707 , 0.20272928],
       [0.16107567, 0.8389244 ],
       [0.17463814, 0.82536185],
       [0.5755565 , 0.42444348],
       [0.3575625 , 0.64243746]], dtype=float32)

In [17]:
net.get_params();

In [22]:
net.set_params(verbose=0)

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense): Linear(in_features=20, out_features=50, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (output): Linear(in_features=50, out_features=2, bias=True)
  ),
)

In [24]:
_ = clone(net)

In [26]:
cross_validate(net, X, y, cv=3)

{'fit_time': array([5.89048505, 6.69337392, 4.49859238]),
 'score_time': array([0.05821371, 0.10425282, 0.17354083]),
 'test_score': array([0.86562687, 0.85572885, 0.88505402])}

### Pipeline

In [27]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [28]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('net', net),
])

pipe.fit(X, y)

Pipeline(memory=None,
         steps=[('scale',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('net',
                 <class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense): Linear(in_features=20, out_features=50, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (output): Linear(in_features=50, out_features=2, bias=True)
  ),
))],
         verbose=False)

In [31]:
pipe.predict(X[:5])

array([0, 1, 1, 0, 1])

In [32]:
pipe.predict_proba(X[:5])

array([[0.56465125, 0.43534878],
       [0.2613952 , 0.7386048 ],
       [0.26589844, 0.7341016 ],
       [0.5664597 , 0.43354028],
       [0.32822582, 0.67177415]], dtype=float32)

### pickle

In [35]:
import pickle

Saves the whole pipeline, including preprocessing and the neural net.

In [36]:
with open('my_pipeline.pickle', 'wb') as f:
    pickle.dump(pipe, f)

  "type " + obj.__name__ + ". It won't be checked "


### GridSearchCV

In [40]:
from sklearn.model_selection import GridSearchCV

In [41]:
params = {
    'lr': [0.05, 0.1],
    'optimizer__momentum': [0.0, 0.9],
    'module__num_units': [10, 20, 50],
    'module__dropout': [0, 0.5],
}

In [42]:
%time search = GridSearchCV(net, params, verbose=2, cv=3).fit(X, y)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.0 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.0, total=   7.1s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.0 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.1s remaining:    0.0s


[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.0, total=   3.9s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.0 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.0, total=   4.6s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.9 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.9, total=   4.2s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.9 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.9, total=   5.7s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.9 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__momentum=0.9, total=   3.7s
[CV] lr=0.05, module__dropout=0, module__num_units=20, optimizer__momentum=0.0 
[CV]  lr=0.05, module__dropout=0, module__num_units=20, optimizer__momentum=0.0, total=   5.2s
[CV] lr=0.05, module__dropout=

[CV]  lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.0, total=   6.5s
[CV] lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.0 
[CV]  lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.0, total=   6.2s
[CV] lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.0 
[CV]  lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.0, total=   7.0s
[CV] lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.9 
[CV]  lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.9, total=   4.3s
[CV] lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.9 
[CV]  lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.9, total=   7.4s
[CV] lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.9 
[CV]  lr=0.1, module__dropout=0, module__num_units=50, optimizer__momentum=0.9, total=   7.4s
[CV] lr=0.1, module__dropout=0.5, module_

[Parallel(n_jobs=1)]: Done  72 out of  72 | elapsed:  6.3min finished


CPU times: user 29min 54s, sys: 25.6 s, total: 30min 20s
Wall time: 6min 26s


In [44]:
search.best_score_, search.best_params_

(0.96,
 {'lr': 0.1,
  'module__dropout': 0,
  'module__num_units': 50,
  'optimizer__momentum': 0.9})

### swap skorch net for any other sklearn estimator

In [45]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [46]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('model', net),
])
params = {'model': [net, KNeighborsClassifier(), LogisticRegression()]}
search = GridSearchCV(pipe, params, verbose=2, cv=3)

In [None]:
%time search.fit(X, y)

Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] model=<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense): Linear(in_features=20, out_features=50, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (output): Linear(in_features=50, out_features=2, bias=True)
  ),
) 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  model=<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense): Linear(in_features=20, out_features=50, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (output): Linear(in_features=50, out_features=2, bias=True)
  ),
), total=   5.7s
[CV] model=<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MyModule(
    (dense): Linear(in_features=20, out_features=50, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (output): Linear(in_features=50, out_features=2, bias=True)
  ),
) 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.7s remaining:    0.0s


In [None]:
search.best_score_, search.best_params_

## Saving and loading a model

Save and load either the whole model by using pickle or just the learned model parameters by calling `save_params` and `load_params`.

### Saving the whole model

In [21]:
import pickle

In [22]:
file_name = '/tmp/mymodel.pkl'

In [23]:
with open(file_name, 'wb') as f:
    pickle.dump(net, f)

  "type " + obj.__name__ + ". It won't be checked "


In [24]:
with open(file_name, 'rb') as f:
    new_net = pickle.load(f)

### Saving only the model parameters

This only saves and loads the proper `module` parameters, meaning that hyperparameters such as `lr` and `max_epochs` are not saved. Therefore, to load the model, we have to re-initialize it beforehand.

In [25]:
net.save_params(f_params=file_name)  # a file handler also works

In [26]:
# first initialize the model
new_net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
).initialize()

In [27]:
new_net.load_params(file_name)

## Usage with an `sklearn Pipeline`

It is possible to put the `NeuralNetClassifier` inside an `sklearn Pipeline`, as you would with any `sklearn` classifier.

In [28]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [29]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('net', net),
])

In [30]:
pipe.fit(X, y)

Re-initializing module!
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7243[0m       [32m0.5000[0m        [35m0.7105[0m  0.0184
      2        [36m0.7057[0m       0.5000        [35m0.6996[0m  0.0207
      3        [36m0.6971[0m       0.5000        [35m0.6949[0m  0.0192
      4        [36m0.6936[0m       [32m0.5050[0m        [35m0.6929[0m  0.0224
      5        [36m0.6923[0m       [32m0.5400[0m        [35m0.6916[0m  0.0210
      6        [36m0.6905[0m       0.5000        [35m0.6906[0m  0.0189
      7        [36m0.6894[0m       0.5100        [35m0.6899[0m  0.0194
      8        [36m0.6891[0m       0.5150        [35m0.6892[0m  0.0186
      9        0.6899       0.5250        [35m0.6885[0m  0.0202
     10        [36m0.6844[0m       0.5300        [35m0.6876[0m  0.0189
     11        0.6853       [32m0.5650[0m        [35m0.6865[0m  0.0199
     12        [36

Pipeline(memory=None,
     steps=[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('net', <class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=ClassifierModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (dropout): Dropout(p=0.5)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
  ),
))])

In [31]:
y_proba = pipe.predict_proba(X[:5])
y_proba

array([[0.5064775 , 0.49352255],
       [0.53243965, 0.46756038],
       [0.57306874, 0.42693123],
       [0.54179883, 0.45820117],
       [0.5528906 , 0.44710937]], dtype=float32)

To save the whole pipeline, including the pytorch module, use `pickle`.

## Callbacks

Adding a new callback to the model is straightforward. Below we show how to add a new callback that determines the area under the ROC (AUC) score.

In [32]:
from skorch.callbacks import EpochScoring

There is a scoring callback in skorch, `EpochScoring`, which we use for this. We have to specify which score to calculate. We have 3 choices:

* Passing a string: This should be a valid `sklearn` metric. For a list of all existing scores, look [here](http://scikit-learn.org/stable/modules/classes.html#sklearn-metrics-metrics).
* Passing `None`: If you implement your own `.score` method on your neural net, passing `scoring=None` will tell `skorch` to use that.
* Passing a function or callable: If we want to define our own scoring function, we pass a function with the signature `func(model, X, y) -> score`, which is then used.

Note that this works exactly the same as scoring in `sklearn` does.

For our case here, since `sklearn` already implements AUC, we just pass the correct string `'roc_auc'`. We should also tell the callback that higher scores are better (to get the correct colors printed below -- by default, lower scores are assumed to be better). Furthermore, we may specify a `name` argument for `EpochScoring`, and whether to use training data (by setting `on_train=True`) or validation data (which is the default).

In [33]:
auc = EpochScoring(scoring='roc_auc', lower_is_better=False)

Finally, we pass the scoring callback to the `callbacks` parameter as a list and then call `fit`. Notice that we get the printed scores and color highlighting for free.

In [34]:
net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
    callbacks=[auc],
)

In [35]:
net.fit(X, y)

  epoch    roc_auc    train_loss    valid_acc    valid_loss     dur
-------  ---------  ------------  -----------  ------------  ------
      1     [36m0.6112[0m        [32m0.7076[0m       [35m0.5550[0m        [31m0.6802[0m  0.0188
      2     [36m0.6766[0m        [32m0.6750[0m       [35m0.6150[0m        [31m0.6626[0m  0.0204
      3     [36m0.7031[0m        [32m0.6560[0m       [35m0.6500[0m        [31m0.6498[0m  0.0244
      4     [36m0.7201[0m        [32m0.6364[0m       [35m0.6650[0m        [31m0.6381[0m  0.0193
      5     [36m0.7316[0m        [32m0.6176[0m       [35m0.6900[0m        [31m0.6285[0m  0.0203
      6     [36m0.7447[0m        [32m0.6094[0m       [35m0.7200[0m        [31m0.6183[0m  0.0222
      7     [36m0.7522[0m        0.6170       0.7200        [31m0.6090[0m  0.0188
      8     [36m0.7567[0m        [32m0.5786[0m       0.7150        [31m0.6032[0m  0.0197
      9     [36m0.7630[0m        0.5850       0.7100     

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=ClassifierModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (dropout): Dropout(p=0.5)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
  ),
)

For information on how to write custom callbacks, have a look at the [Advanced_Usage](https://nbviewer.jupyter.org/github/skorch-dev/skorch/blob/master/notebooks/Advanced_Usage.ipynb) notebook.

## Usage with sklearn `GridSearchCV`

### Special prefixes

The `NeuralNet` class allows to directly access parameters of the `pytorch module` by using the `module__` prefix. So e.g. if you defined the `module` to have a `num_units` parameter, you can set it via the `module__num_units` argument. This is exactly the same logic that allows to access estimator parameters in `sklearn Pipeline`s and `FeatureUnion`s.

This feature is useful in several ways. For one, it allows to set those parameters in the model definition. Furthermore, it allows you to set parameters in an `sklearn GridSearchCV` as shown below.

In addition to the parameters prefixed by `module__`, you may access a couple of other attributes, such as those of the optimizer by using the `optimizer__` prefix (again, see below). All those special prefixes are stored in the `prefixes_` attribute:

In [36]:
print(', '.join(net.prefixes_))

module, iterator_train, iterator_valid, optimizer, criterion, callbacks, dataset


### Performing a grid search

Below we show how to perform a grid search over the learning rate (`lr`), the module's number of hidden units (`module__num_units`), the module's dropout rate (`module__dropout`), and whether the SGD optimizer should use Nesterov momentum or not (`optimizer__nesterov`).

In [37]:
from sklearn.model_selection import GridSearchCV

In [38]:
net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
    verbose=0,
    optimizer__momentum=0.9,
)

In [39]:
params = {
    'lr': [0.05, 0.1],
    'module__num_units': [10, 20],
    'module__dropout': [0, 0.5],
    'optimizer__nesterov': [False, True],
}

In [40]:
gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy', verbose=2)

In [41]:
gs.fit(X, y)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False, total=   0.3s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False, total=   0.3s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=False, total=   0.3s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True, total=   0.3s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True, total=   0.3s
[CV] lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True 
[CV]  lr=0.05, module__dropout=0, module__num_units=10, optimizer__nesterov=True, total=   0.3s
[CV] lr=0.05, module__dropout=0, module__num_units=20, optimizer__nesterov=False 
[CV]  lr=0.05, module__dropout=0, module__num_units=20, optimizer__nesterov=False, total=   0.3s
[CV] lr=0.05, 

[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:   15.7s finished


GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=<class 'skorch.classifier.NeuralNetClassifier'>[uninitialized](
  module=<class '__main__.ClassifierModule'>,
),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'lr': [0.05, 0.1], 'module__num_units': [10, 20], 'module__dropout': [0, 0.5], 'optimizer__nesterov': [False, True]},
       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',
       scoring='accuracy', verbose=2)

In [42]:
print(gs.best_score_, gs.best_params_)

0.862 {'lr': 0.05, 'module__dropout': 0, 'module__num_units': 20, 'optimizer__nesterov': False}


Of course, we could further nest the `NeuralNetClassifier` within an `sklearn Pipeline`, in which case we just prefix the parameter by the name of the net (e.g. `net__module__num_units`).