# Otto Group Product Classification Challenge using nolearn/lasagne

This short notebook is meant to help you getting started with nolearn and lasagne in order to train a neural net and make a submission to the Otto Group Product Classification Challenge.

* [Otto Group Product Classification Challenge](https://www.kaggle.com/c/otto-group-product-classification-challenge)
* [Get the notebook from the Otto Group repository](https://github.com/ottogroup)
* [Nolearn repository](https://github.com/dnouri/nolearn)
* [Lasagne repository](https://github.com/benanne/Lasagne)
* [A nolearn/lasagne tutorial for convolutional nets](http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/)

## Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [2]:
from lasagne.layers import DenseLayer
from lasagne.layers import InputLayer
from lasagne.layers import DropoutLayer
from lasagne.nonlinearities import softmax
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

## Utility functions

In [3]:
def load_train_data(path):
    df = pd.read_csv(path)
    X = df.values.copy()
    np.random.shuffle(X)
    X, labels = X[:, 1:-1].astype(np.float32), X[:, -1]
    encoder = LabelEncoder()
    y = encoder.fit_transform(labels).astype(np.int32)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y, encoder, scaler

In [4]:
def load_test_data(path, scaler):
    df = pd.read_csv(path)
    X = df.values.copy()
    X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
    X = scaler.transform(X)
    return X, ids

In [5]:
def make_submission(clf, X_test, ids, encoder, name='my_neural_net_submission.csv'):
    y_prob = clf.predict_proba(X_test)
    with open(name, 'w') as f:
        f.write('id,')
        f.write(','.join(encoder.classes_))
        f.write('\n')
        for id, probs in zip(ids, y_prob):
            probas = ','.join([id] + map(str, probs.tolist()))
            f.write(probas)
            f.write('\n')
    print("Wrote submission to file {}.".format(name))

## Load Data

In [6]:
X, y, encoder, scaler = load_train_data('data/train.csv')

In [7]:
X_test, ids = load_test_data('data/test.csv', scaler)

In [8]:
num_classes = len(encoder.classes_)
num_features = X.shape[1]

## Train Neural Net

In [10]:
layers0 = [('input', InputLayer),
           ('dense0', DenseLayer),
           ('dropout', DropoutLayer),
           ('dense1', DenseLayer),
           ('output', DenseLayer)]

In [11]:
net0 = NeuralNet(layers=layers0,
                 
                 input_shape=(None, num_features),
                 dense0_num_units=275,
                 dropout_p=0.45,
                 dense1_num_units=275,
                 output_num_units=num_classes,
                 output_nonlinearity=softmax,
                 
                 update=nesterov_momentum,
                 update_learning_rate=0.009,
                 update_momentum=0.91,
                 
                 eval_size=0.2,
                 verbose=1,
                 max_epochs=100)


In [None]:
net0.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 275)         	produces     275 outputs
  DropoutLayer      	(None, 275)         	produces     275 outputs
  DenseLayer        	(None, 275)         	produces     275 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.891391[0m  |  [32m  0.653195[0m  |     1.364664  |     75.10%  |  9.4s
     2  |  [94m  0.681409[0m  |  [32m  0.612420[0m  |     1.112650  |     76.39%  |  10.1s
     3  |  [94m  0.640595[0m  |  [32m  0.585888[0m  |     1.093374  |     77.42%  |  9.4s
     4  |  [94m  0.617803[0m  |  [32m  0.573120[0m  |     1.077964  |     77.74%  |  9.7s

In [12]:
net1.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 250)         	produces     250 outputs
  DropoutLayer      	(None, 250)         	produces     250 outputs
  DenseLayer        	(None, 200)         	produces     200 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.902314[0m  |  [32m  0.665007[0m  |     1.356849  |     74.67%  |  6.7s
     2  |  [94m  0.697292[0m  |  [32m  0.621140[0m  |     1.122600  |     75.96%  |  6.8s
     3  |  [94m  0.659701[0m  |  [32m  0.601324[0m  |     1.097082  |     76.80%  |  6.7s
     4  |  [94m  0.638440[0m  |  [32m  0.585768[0m  |     1.089919  |     77.37%  |  6.6s
     5  |  [94m  0.618017[0m  |  [32m  0.578173[0m  |     1.068914  |     77.52%  |  6.6s
     6  |  [94m  0.602081[0m  |  [32m  

NeuralNet(X_tensor_type=<function matrix at 0x7fc9ed37f140>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3cd10>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3ccd0>,
     dense0_num_units=250, dense1_num_units=200, dropout_p=0.5,
     eval_size=0.2, input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9ea861668>,
     max_epochs=100, more_params={}, on_epoch_finished=(),
     on_training_finished=(),
     output_nonlinearity=<theano.tensor.nnet.nnet.Softmax object at 0x7fc9ecfcab90>,
     output_num_units=9, regression=False,
     update=<function nesterov_momentum at 0x7fc9ea861320>,
     update_learning_rate=0.01, 

In [13]:
net2.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 300)         	produces     300 outputs
  DropoutLayer      	(None, 300)         	produces     300 outputs
  DenseLayer        	(None, 300)         	produces     300 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.899507[0m  |  [32m  0.655253[0m  |     1.372762  |     75.40%  |  10.5s
     2  |  [94m  0.684028[0m  |  [32m  0.613025[0m  |     1.115825  |     76.48%  |  10.4s
     3  |  [94m  0.644799[0m  |  [32m  0.591140[0m  |     1.090771  |     76.90%  |  10.4s
     4  |  [94m  0.624222[0m  |  [32m  0.578944[0m  |     1.078209  |     77.35%  |  10.4s
     5  |  [94m  0.608369[0m  |  [32m  0.568021[0m  |     1.071034  |     77.50%  |  10.4s
     6  |  [94m  0.593068[0m  |  [

NeuralNet(X_tensor_type=<function matrix at 0x7fc9ed37f140>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3cd10>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3ccd0>,
     dense0_num_units=300, dense1_num_units=300, dropout_p=0.5,
     eval_size=0.2, input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9ea861668>,
     max_epochs=100, more_params={}, on_epoch_finished=(),
     on_training_finished=(),
     output_nonlinearity=<theano.tensor.nnet.nnet.Softmax object at 0x7fc9ecfcab90>,
     output_num_units=9, regression=False,
     update=<function nesterov_momentum at 0x7fc9ea861320>,
     update_learning_rate=0.01, 

In [14]:
net3.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 200)         	produces     200 outputs
  DropoutLayer      	(None, 200)         	produces     200 outputs
  DenseLayer        	(None, 200)         	produces     200 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  1.056229[0m  |  [32m  0.699174[0m  |     1.510681  |     73.86%  |  4.7s
     2  |  [94m  0.796770[0m  |  [32m  0.658425[0m  |     1.210116  |     74.60%  |  4.7s
     3  |  [94m  0.747434[0m  |  [32m  0.636940[0m  |     1.173476  |     75.16%  |  4.7s
     4  |  [94m  0.721201[0m  |  [32m  0.625434[0m  |     1.153120  |     75.24%  |  4.7s
     5  |  [94m  0.698607[0m  |  [32m  0.614030[0m  |     1.137739  |     75.99%  |  4.7s
     6  |  [94m  0.686315[0m  |  [32m  

NeuralNet(X_tensor_type=<function matrix at 0x7fc9ed37f140>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3cd10>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3ccd0>,
     dense0_num_units=200, dense1_num_units=200, dropout_p=0.75,
     eval_size=0.2, input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9ea861668>,
     max_epochs=100, more_params={}, on_epoch_finished=(),
     on_training_finished=(),
     output_nonlinearity=<theano.tensor.nnet.nnet.Softmax object at 0x7fc9ecfcab90>,
     output_num_units=9, regression=False,
     update=<function nesterov_momentum at 0x7fc9ea861320>,
     update_learning_rate=0.01,

In [18]:
net4.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 200)         	produces     200 outputs
  DropoutLayer      	(None, 200)         	produces     200 outputs
  DenseLayer        	(None, 200)         	produces     200 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  1.056035[0m  |  [32m  0.702061[0m  |     1.504192  |     73.94%  |  4.8s


NeuralNet(X_tensor_type=<function matrix at 0x7fc9ed37f140>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3cd10>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3ccd0>,
     dense0_num_units=200, dense1_num_units=200, dropout_p=0.75,
     eval_size=0.2, input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9ea861668>,
     max_epochs=100, more_params={}, on_epoch_finished=(),
     on_training_finished=(),
     output_nonlinearity=<theano.tensor.nnet.nnet.Softmax object at 0x7fc9ecfcab90>,
     output_num_units=9, regression=False,
     update=<function nesterov_momentum at 0x7fc9ea861320>,
     update_learning_rate=0.02,

In [19]:
net5.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 400)         	produces     400 outputs
  DropoutLayer      	(None, 400)         	produces     400 outputs
  DenseLayer        	(None, 400)         	produces     400 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.873218[0m  |  [32m  0.655700[0m  |     1.331735  |     75.26%  |  17.1s
     2  |  [94m  0.671351[0m  |  [32m  0.609322[0m  |     1.101799  |     76.61%  |  17.0s
     3  |  [94m  0.635311[0m  |  [32m  0.588956[0m  |     1.078707  |     77.36%  |  17.2s
     4  |  [94m  0.615298[0m  |  [32m  0.577839[0m  |     1.064826  |     77.39%  |  17.1s
     5  |  [94m  0.594160[0m  |  [32m  0.563531[0m  |     1.054353  |     77.75%  |  16.9s
     6  |  [94m  0.583709[0m  |  [

NeuralNet(X_tensor_type=<function matrix at 0x7fc9ed37f140>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3cd10>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3ccd0>,
     dense0_num_units=400, dense1_num_units=400, dropout_p=0.5,
     eval_size=0.2, input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9ea861668>,
     max_epochs=100, more_params={}, on_epoch_finished=(),
     on_training_finished=(),
     output_nonlinearity=<theano.tensor.nnet.nnet.Softmax object at 0x7fc9ecfcab90>,
     output_num_units=9, regression=False,
     update=<function nesterov_momentum at 0x7fc9ea861320>,
     update_learning_rate=0.01, 

In [21]:
net6.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 300)         	produces     300 outputs
  DropoutLayer      	(None, 300)         	produces     300 outputs
  DenseLayer        	(None, 300)         	produces     300 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.946256[0m  |  [32m  0.674697[0m  |     1.402490  |     74.25%  |  9.6s
     2  |  [94m  0.727588[0m  |  [32m  0.636392[0m  |     1.143300  |     75.74%  |  9.5s
     3  |  [94m  0.684885[0m  |  [32m  0.612714[0m  |     1.117788  |     76.27%  |  9.6s
     4  |  [94m  0.658795[0m  |  [32m  0.596843[0m  |     1.103800  |     76.74%  |  9.5s
     5  |  [94m  0.644275[0m  |  [32m  0.586196[0m  |     1.099078  |     76.95%  |  9.5s
     6  |  [94m  0.632103[0m  |  [32m  

NeuralNet(X_tensor_type=<function matrix at 0x7fc9ed37f140>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3cd10>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9e9f3ccd0>,
     dense0_num_units=300, dense1_num_units=300, dropout_p=0.65,
     eval_size=0.2, input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9ea861668>,
     max_epochs=100, more_params={}, on_epoch_finished=(),
     on_training_finished=(),
     output_nonlinearity=<theano.tensor.nnet.nnet.Softmax object at 0x7fc9ecfcab90>,
     output_num_units=9, regression=False,
     update=<function nesterov_momentum at 0x7fc9ea861320>,
     update_learning_rate=0.01,

In [None]:
net7.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 300)         	produces     300 outputs
  DropoutLayer      	(None, 300)         	produces     300 outputs
  DenseLayer        	(None, 300)         	produces     300 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.860121[0m  |  [32m  0.647825[0m  |     1.327706  |     75.10%  |  10.9s
     2  |  [94m  0.662327[0m  |  [32m  0.607538[0m  |     1.090182  |     76.43%  |  11.0s
     3  |  [94m  0.627324[0m  |  [32m  0.587278[0m  |     1.068189  |     76.94%  |  11.0s
     4  |  [94m  0.605131[0m  |  [32m  0.574362[0m  |     1.053571  |     77.47%  |  10.9s
     5  |  [94m  0.588685[0m  |  [32m  0.564029[0m  |     1.043714  |     77.58%  |  10.9s
     6  |  [94m  0.575891[0m  |  [

## Prepare Submission File

In [14]:
make_submission(net0, X_test, ids, encoder)

Wrote submission to file my_neural_net_submission.csv.
