# Otto Group Product Classification Challenge using nolearn/lasagne

This short notebook is meant to help you getting started with nolearn and lasagne in order to train a neural net and make a submission to the Otto Group Product Classification Challenge.

* [Otto Group Product Classification Challenge](https://www.kaggle.com/c/otto-group-product-classification-challenge)
* [Get the notebook from the Otto Group repository](https://github.com/ottogroup)
* [Nolearn repository](https://github.com/dnouri/nolearn)
* [Lasagne repository](https://github.com/benanne/Lasagne)
* [A nolearn/lasagne tutorial for convolutional nets](http://danielnouri.org/notes/2014/12/17/using-convolutional-neural-nets-to-detect-facial-keypoints-tutorial/)

## Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import os
import theano

Using gpu device 0: GeForce 210


In [2]:
from lasagne.layers import DenseLayer
from lasagne.layers import InputLayer
from lasagne.layers import DropoutLayer
from lasagne.nonlinearities import softmax, leaky_rectify, LeakyRectify
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

## Utility functions

In [3]:
def load_train_data(path):
    df = pd.read_csv(path)
    X = df.values.copy()
    np.random.shuffle(X)
    X, labels = X[:, 1:-1].astype(np.float32), X[:, -1]
    encoder = LabelEncoder()
    y = encoder.fit_transform(labels).astype(np.int32)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y, encoder, scaler

In [4]:
def load_test_data(path, scaler):
    df = pd.read_csv(path)
    df = df.iloc[:,:-1]
    X = df.values.copy()
    X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
    X = scaler.transform(X)
    return X, ids

In [5]:
def make_submission(clf, X_test, ids, encoder):
    y_prob = clf.predict_proba(X_test)
    
    i = 0
    while os.path.exists(os.path.join("submissions", "nn-" + str(i) + ".csv")):
        i += 1
    name = os.path.join("submissions", "nn-" + str(i) + ".csv")
    
    with open(name, 'w') as f:
        f.write('id,')
        f.write(','.join(encoder.classes_))
        f.write('\n')
        for id, probs in zip(ids, y_prob):
            probas = ','.join([id] + map(str, probs.tolist()))
            f.write(probas)
            f.write('\n')
            
    print("Wrote submission to file {}.".format(name))

## Load Data

In [6]:
X, y, encoder, scaler = load_train_data("../data/train80.csv")

In [7]:
X_test, ids = load_test_data('../data/holdout20.csv', scaler)

In [8]:
num_classes = len(encoder.classes_)
num_features = X.shape[1]

# Adjust network parameters over time

In [9]:
def float32(k):
    return np.cast['float32'](k)

class AdjustVariable(object):
    def __init__(self, name, start=0.03, stop=0.001):
        self.name = name
        self.start, self.stop = start, stop
        self.ls = None

    def __call__(self, nn, train_history):
        if self.ls is None:
            self.ls = np.linspace(self.start, self.stop, nn.max_epochs)

        epoch = train_history[-1]['epoch']
        new_value = float32(self.ls[epoch - 1])
        getattr(nn, self.name).set_value(new_value)

class EarlyStopping(object):
    def __init__(self, patience=100):
        self.patience = patience
        self.best_valid = np.inf
        self.best_valid_epoch = 0
        self.best_weights = None

    def __call__(self, nn, train_history):
        current_valid = train_history[-1]['valid_loss']
        current_epoch = train_history[-1]['epoch']
        if current_valid < self.best_valid:
            self.best_valid = current_valid
            self.best_valid_epoch = current_epoch
            self.best_weights = [w.get_value() for w in nn.get_all_params()]
        elif self.best_valid_epoch + self.patience < current_epoch:
            print("Early stopping.")
            print("Best valid loss was {:.6f} at epoch {}.".format(
                self.best_valid, self.best_valid_epoch))
            nn.load_weights_from(self.best_weights)
            raise StopIteration()

## Train Neural Net

In [12]:
layers0 = [('input', InputLayer),
           ('dropoutin', DropoutLayer),
           ('dense0', DenseLayer),
           ('dropout0', DropoutLayer),
           ('dense1', DenseLayer),
           ('dropout1', DropoutLayer),
           ('output', DenseLayer)]

# Log loss

In [10]:
sample_sub = "../submissions/sampleSubmission.csv"
sample_sub_df = pd.read_csv(sample_sub)

def normalize(row, epsilon=1e-15):
    
    row = row / np.sum(row)
    row = np.maximum(epsilon, row)
    row = np.minimum(1 - epsilon, row)
    
    return row
    
def logloss_mc(y_true, y_probs):
    
    # Normalize probability data frame
    y_probs = y_probs.apply(normalize, axis=1)
        
    log_vals = []
        
    for i, y in enumerate(y_true):
        c = int(y.split("_")[1])
        log_vals.append(- np.log(y_probs.iloc[i,c - 1]))
        
    return np.mean(log_vals)

df_holdout = pd.read_csv("../data/holdout20.csv")
y_valid = df_holdout.target

# Define hyperparameter

In [13]:
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
import functools


def f21(f):
    return theano.shared(float32(f))

space = {  'layers' : layers0,
                'input_shape' : (None, num_features),
                
                'dropoutin_p' : hp.uniform('dropin', 0, 0.2),
                
                'dense0_num_units': hp.quniform('dense0', 200, 600, 30),
                'dense0_nonlinearity' : hp.choice('leaky0', [LeakyRectify(x) for x in np.linspace(0, 1, 6)]) ,
                'dropout0_p': hp.uniform('drop0', 0, 0.5),
                
                'dense1_num_units' : hp.quniform('dense1', 200, 600, 30),
                'dense1_nonlinearity' : hp.choice('leaky1', [LeakyRectify(x) for x in np.linspace(0, 1, 6)]),
                'dropout1_p' : hp.uniform('drop1', 0, 0.6),
                
                'output_num_units' : num_classes,
                'output_nonlinearity' : softmax,

                'update' : nesterov_momentum,
              
                'update_learning_rate' : hp.choice('ulr', [f21(f) for f in np.linspace(0.01, 0.03, 10)]),
                'update_momentum' : hp.choice('um', [f21(f) for f in np.linspace(0.9, 0.99, 5)]),
                'eval_size' : None,
                'verbose' : 1,
                'max_epochs' : 70}

# Different submission method

In [14]:
def make_submission_hyper(clf, X_test, ids, encoder, name='../hypersub.csv'):
    y_prob = clf.predict_proba(X_test)
    
    with open(name, 'w') as f:
        f.write('id,')
        f.write(','.join(encoder.classes_))
        f.write('\n')
        for id, probs in zip(ids, y_prob):
            probas = ','.join([id] + map(str, probs.tolist()))
            f.write(probas)
            f.write('\n')
            
    print("Wrote submission to file {}.".format(name))

In [None]:
# XGBoost polished
subpol = pd.read_csv("../submission.csv").iloc[:, 1:]
logloss_mc(y_valid, subpol)


def objective(hyperparameter):

    mynet = NeuralNet(** hyperparameter)
    mynet.fit(X, y)

    make_submission_hyper(mynet, X_test, ids, encoder)

    sub = pd.read_csv("../hypersub.csv").iloc[:, 1:]
    ll = logloss_mc(y_valid, sub)
    print(ll)
    print(hyperparameter)
    return {'loss' : ll,
            'status' : STATUS_OK}


trials = Trials()

best = fmin(fn=objective,
    space=space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials)

print best

ERROR (theano.gof.opt): Optimization failure due to: local_gpu_softmax_with_bias
ERROR:theano.gof.opt:Optimization failure due to: local_gpu_softmax_with_bias
ERROR (theano.gof.opt): TRACEBACK:
ERROR:theano.gof.opt:TRACEBACK:
ERROR (theano.gof.opt): Traceback (most recent call last):
  File "/usr/lib/python2.7/site-packages/theano/gof/opt.py", line 1491, in process_node
    replacements = lopt.transform(node)
  File "/usr/lib/python2.7/site-packages/theano/sandbox/cuda/opt.py", line 1119, in local_gpu_softmax_with_bias
    gpu_sm = GpuSoftmaxWithBias()(gpu_from_host(x), gpu_from_host(b))
  File "/usr/lib/python2.7/site-packages/theano/gof/op.py", line 488, in __call__
    node = self.make_node(*inputs, **kwargs)
  File "/usr/lib/python2.7/site-packages/theano/sandbox/cuda/basic_ops.py", line 133, in make_node
    dtype=x.dtype)()])
  File "/usr/lib/python2.7/site-packages/theano/sandbox/cuda/type.py", line 69, in __init__
    (self.__class__.__name__, dtype, name))
TypeError: CudaNdarr

  InputLayer        	(None, 93)          	produces      93 outputs
  DropoutLayer      	(None, 93)          	produces      93 outputs
  DenseLayer        	(None, 510.0)       	produces   510.0 outputs
  DropoutLayer      	(None, 510.0)       	produces   510.0 outputs
  DenseLayer        	(None, 360.0)       	produces   360.0 outputs
  DropoutLayer      	(None, 360.0)       	produces   360.0 outputs
  DenseLayer        	(None, 9)           	produces       9 outputs

 Epoch  |  Train loss  |  Valid loss  |  Train / Val  |  Valid acc  |  Dur
--------|--------------|--------------|---------------|-------------|-------
     1  |  [94m  0.838694[0m  |         nan  |          nan  |       nan%  |  29.2s
     2  |  [94m  0.761991[0m  |         nan  |          nan  |       nan%  |  29.7s
     3  |  [94m  0.749804[0m  |         nan  |          nan  |       nan%  |  29.1s
     4  |  [94m  0.746817[0m  |         nan  |          nan  |       nan%  |  28.9s

In [None]:
print trials.trials()