# Otto Group Product Classification Challenge using nolearn/lasagne

This short notebook is meant to help you getting started with nolearn and lasagne in order to train a neural net and make a submission to the Otto Group Product Classification Challenge.

* [Otto Group Product Classification Challenge](https://www.kaggle.com/c/otto-group-product-classification-challenge)
* [Get the notebook from the Otto Group repository](https://github.com/ottogroup)
* [Nolearn repository](https://github.com/dnouri/nolearn)
* [Lasagne repository](https://github.com/benanne/Lasagne)
* [A nolearn/lasagne tutorial for convolutional nets](http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/)

## Imports

In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import theano

In [6]:
from lasagne.layers import DenseLayer
from lasagne.layers import InputLayer
from lasagne.layers import DropoutLayer
from lasagne.nonlinearities import softmax
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

## Utility functions

In [7]:
def load_train_data(path):
    df = pd.read_csv(path)
    X = df.values.copy()
    np.random.shuffle(X)
    X, labels = X[:, 1:-1].astype(np.float32), X[:, -1]
    encoder = LabelEncoder()
    y = encoder.fit_transform(labels).astype(np.int32)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y, encoder, scaler

In [8]:
def load_test_data(path, scaler):
    df = pd.read_csv(path)
    X = df.values.copy()
    X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
    X = scaler.transform(X)
    return X, ids

In [9]:
def make_submission(clf, X_test, ids, encoder, name='lasagne-otto-final-25.csv'):
    y_prob = clf.predict_proba(X_test)
    with open(name, 'w') as f:
        f.write('id,')
        f.write(','.join(encoder.classes_))
        f.write('\n')
        for id, probs in zip(ids, y_prob):
            probas = ','.join([id] + map(str, probs.tolist()))
            f.write(probas)
            f.write('\n')
    print("Wrote submission to file {}.".format(name))

## Load Data

In [10]:
X, y, encoder, scaler = load_train_data('../data/train25.csv')

In [13]:
X_test, ids = load_test_data('../data/test25_no_classes.csv', scaler)

In [14]:
num_classes = len(encoder.classes_)
num_features = X.shape[1]

In [15]:
def float32(k):
    return np.cast['float32'](k)

class AdjustVariable(object):
    def __init__(self, name, start=0.03, stop=0.001):
        self.name = name
        self.start, self.stop = start, stop
        self.ls = None

    def __call__(self, nn, train_history):
        if self.ls is None:
            self.ls = np.linspace(self.start, self.stop, nn.max_epochs)

        epoch = train_history[-1]['epoch']
        new_value = float32(self.ls[epoch - 1])
        getattr(nn, self.name).set_value(new_value)

class EarlyStopping(object):
    def __init__(self, patience=100):
        self.patience = patience
        self.best_valid = np.inf
        self.best_valid_epoch = 0
        self.best_weights = None

    def __call__(self, nn, train_history):
        current_valid = train_history[-1]['valid_loss']
        current_epoch = train_history[-1]['epoch']
        if current_valid < self.best_valid:
            self.best_valid = current_valid
            self.best_valid_epoch = current_epoch
            self.best_weights = [w.get_value() for w in nn.get_all_params()]
        elif self.best_valid_epoch + self.patience < current_epoch:
            print("Early stopping.")
            print("Best valid loss was {:.6f} at epoch {}.".format(
                self.best_valid, self.best_valid_epoch))
            nn.load_weights_from(self.best_weights)
            raise StopIteration()

## Train Neural Net

In [16]:
layers0 = [('input', InputLayer),
           ('dropoutin', DropoutLayer),
           ('dense0', DenseLayer),
           ('dropout0', DropoutLayer),
           ('dense1', DenseLayer),
           ('dropout1', DropoutLayer),
           ('dense2', DenseLayer),
           ('output', DenseLayer)]

In [17]:
net0 = NeuralNet(layers=layers0,
                 
                 input_shape=(None, num_features),
                 dropoutin_p = 0.0002,
                 
                 dense0_num_units=1024,
                 dropout0_p=0.235,
                 
                 dense1_num_units=512,
                 dropout1_p=0.29,
                 
                 dense2_num_units=256,
                 
                 output_num_units=num_classes,
                 output_nonlinearity=softmax,
                 
                 update=nesterov_momentum,                 
                 
                 update_learning_rate=theano.shared(float32(0.03)),
                 update_momentum=theano.shared(float32(0.92)),

                on_epoch_finished=[
                    AdjustVariable('update_learning_rate', start=0.03, stop=0.0001),
                    AdjustVariable('update_momentum', start=0.92, stop=0.98),
        ],
                 
                 eval_size=None,
                 verbose=1,
                 max_epochs=100)

In [18]:
net0.fit(X, y)

  InputLayer        	(None, 93)          	produces      93 outputs
  DropoutLayer      	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 1024)        	produces    1024 outputs
  DropoutLayer      	(None, 1024)        	produces    1024 outputs
  DenseLayer        	(None, 512)         	produces     512 outputs
  DropoutLayer      	(None, 512)         	produces     512 outputs
  DenseLayer        	(None, 256)         	produces     256 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.965493[0m  |         nan  |          nan  |       nan%  |  3.8s
     2  |  [94m  0.666625[0m  |         nan  |          nan  |       nan%  |  3.7s
     3  |  [94m  0.615967[0m  |         nan  |          nan  |       nan%  |  3.6s
     4  |  [94m  0.578388[0m  |         nan  |        



NeuralNet(X_tensor_type=<function matrix at 0x7fca0180e230>,
     batch_iterator_test=<nolearn.lasagne.BatchIterator object at 0x7fc9f31b4890>,
     batch_iterator_train=<nolearn.lasagne.BatchIterator object at 0x7fc9f31b4850>,
     dense0_num_units=1024, dense1_num_units=512, dense2_num_units=256,
     dropout0_p=0.235, dropout1_p=0.29, dropoutin_p=0.0002, eval_size=None,
     input_shape=(None, 93),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('dropoutin', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense0', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout0', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense1', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout1', <class 'lasagne.layers.noise.DropoutLayer'>), ('dense2', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=<function negative_log_likelihood at 0x7fc9f35e7320>,
     max_epochs=100, more_params={},
     on_epoch_finished=[<__m

## Prepare Submission File

In [19]:
make_submission(net0, X_test, ids, encoder)

Wrote submission to file lasagne-otto-final-25.csv.
