In [124]:
import pandas as pd
import numpy as np
import sys
sys.path.append('./')
from preprocessing import read_data

train_data, test_data, all_X = read_data()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 496509 entries, 7400724257333664556 to 4931599763172137858
Columns: 16 entries, item_price_level to user_occupation_id
dtypes: float64(9), int64(7)
memory usage: 64.4 MB


In [125]:
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import mxnet as mx

ctx = mx.gpu(0)
# ctx = mx.cpu(0)


num_train = train_data.shape[0]

X_train = all_X[:num_train].as_matrix()
X_test = all_X[num_train:].as_matrix()
y_train = train_data['is_trade'].astype(np.int).as_matrix()

X_train = nd.array(X_train)
y_train = nd.array(y_train)
y_train.reshape((num_train, 1))
X_test = nd.array(X_test)

In [176]:
from mxnet.gluon import nn


def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(64, activation="relu"))
        net.add(nn.Dense(2))
    net.initialize(ctx=ctx)
    return net


def get_net_dropout(drop_prob1, drop_prob2):
    net = gluon.nn.Sequential()

    with net.name_scope():
        # 第一层全连接。
        net.add(nn.Dense(64, activation="relu"))
        # 在第一层全连接后添加丢弃层。
        net.add(nn.Dropout(drop_prob1))
        # 第二层全连接。
        net.add(nn.Dense(64, activation="relu"))
        # 在第二层全连接后添加丢弃层。
        net.add(nn.Dropout(drop_prob2))
        net.add(nn.Dense(2))
    net.initialize(ctx=ctx)
    return net


softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()


def evaluate_loss(net, data_iter):
    total_loss = 0.
    for data, label in data_iter:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        loss = softmax_cross_entropy(output, label)
        total_loss += nd.mean(loss).asscalar()
    return total_loss / len(data_iter)


def evaluate_accuracy(net, data_iter):
    acc = nd.array([0])
    n = 0.
    for data, label in data_iter:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        acc += nd.sum(output.argmax(axis=1) == label).asscalar()
        n += label.size
        acc.wait_to_read()  # don't push too many operators into backend
    return acc.asscalar() / n


def evaluate_recall(net, data_iter):
    tp = nd.array([0])
    p = 0.
    for data, label in data_iter:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        yhat = output.argmax(axis=1)
        tp += nd.sum(yhat * label).asscalar()
        p += nd.sum(label).asscalar()
        tp.wait_to_read()  # don't push too many operators into backend
    return tp.asscalar() / p

def evaluate_precision(net, data_iter):
    tp = nd.array([0])
    hat_p = 0.
    for data, label in data_iter:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        yhat = output.argmax(axis=1)
        tp += nd.sum(yhat * label).asscalar()
        hat_p += nd.sum(yhat).asscalar()
        tp.wait_to_read()  # don't push too many operators into backend
    print(tp, hat_p)
    return tp.asscalar() / hat_p

# def evaluate_precision(net, data_iter):
#     tf = nd.array([0])
#     f = 0.
#     for data, label in data_iter:
#         data = data.as_in_context(ctx)
#         label = label.as_in_context(ctx)
#         output = net(data)
#         yhat = output.argmax(axis=1)
#         tf += nd.sum(yhat + label == 0).asscalar()
#         f += nd.sum(label == 0).asscalar()
#         tf.wait_to_read()  # don't push too many operators into backend
    
#     return tf.asscalar() / f

In [177]:
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 120
import matplotlib.pyplot as plt
from time import time
import utils

def train(net, X_train, y_train, X_test, y_test, epochs,
          verbose_epoch, batch_size, learning_rate, weight_decay, lr_decay, lr_decay_epoch):
    """Train a network"""
    print("Start training on ", ctx)
    
    train_loss = []

    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    data_iter_train = utils.DataLoader(
        dataset_train, batch_size, shuffle=True)

    if X_test is not None:
        test_loss = []
        dataset_test = gluon.data.ArrayDataset(X_test, y_test)
        data_iter_test = utils.DataLoader(
            dataset_test, batch_size, shuffle=False)

    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': learning_rate})
    net.collect_params().initialize(force_reinit=True, ctx=ctx)
    
    for epoch in range(epochs):
        start = time()
        for data, label in data_iter_train:
            with autograd.record():
                data = data.as_in_context(ctx)
                label = label.as_in_context(ctx)
                output = net(data)
                loss = softmax_cross_entropy(output, label)

            loss.backward()
            trainer.step(batch_size)
            nd.waitall()

        if epoch > 0 and epoch % lr_decay_epoch == 0:
            trainer.set_learning_rate(trainer.learning_rate * lr_decay)
            print('change lr to %f' % (trainer.learning_rate))

        if epoch >= verbose_epoch:
            cur_train_loss = evaluate_loss(net, data_iter_train)
            train_loss.append(cur_train_loss)
            train_recall = evaluate_recall(net, data_iter_train)

            if X_test is not None:
                cur_test_loss = evaluate_loss(net, data_iter_test)
                test_loss.append(cur_test_loss)
                test_recall = evaluate_recall(net, data_iter_test)

            if X_test is not None:
                print("Epoch %d, train loss: %f, test loss: %f, Train recall %f, Test recall %f, Time %.1f sec" % (
                    epoch, cur_train_loss, cur_test_loss, train_acc, test_recall, time() - start))
            else:
                print("Epoch %d, train loss: %f, Train acc %f, Time %.1f sec" %
                      (epoch, cur_train_loss, train_acc, time() - start))

    plt.plot(train_loss)
    plt.legend(['train'])
    if X_test is not None:
        plt.plot(test_loss)
        plt.legend(['train', 'test'])
    plt.show()
    if X_test is not None:
        return cur_train_loss, cur_test_loss
    else:
        return cur_train_loss

In [178]:
def k_fold_cross_valid(net, k, epochs, verbose_epoch, X_train, y_train,
                       batch_size, learning_rate, weight_decay, lr_decay, lr_decay_epoch):
    assert k > 1
    fold_size = X_train.shape[0] // k
    train_loss_sum = 0.0
    test_loss_sum = 0.0
    for test_i in range(k):
        X_val_test = X_train[test_i * fold_size: (test_i + 1) * fold_size, :]
        y_val_test = y_train[test_i * fold_size: (test_i + 1) * fold_size]

        val_train_defined = False
        for i in range(k):
            if i != test_i:
                X_cur_fold = X_train[i * fold_size: (i + 1) * fold_size, :]
                y_cur_fold = y_train[i * fold_size: (i + 1) * fold_size]
                if not val_train_defined:
                    X_val_train = X_cur_fold
                    y_val_train = y_cur_fold
                    val_train_defined = True
                else:
                    X_val_train = nd.concat(X_val_train, X_cur_fold, dim=0)
                    y_val_train = nd.concat(y_val_train, y_cur_fold, dim=0)

        train_loss, test_loss = train(
            net, X_val_train, y_val_train, X_val_test, y_val_test,
            epochs, verbose_epoch, batch_size, learning_rate, weight_decay, lr_decay, lr_decay_epoch)
        
        train_loss_sum += train_loss
        print("Test loss: %f" % test_loss)
        test_loss_sum += test_loss
    return train_loss_sum / k, test_loss_sum / k

In [180]:
k = 5
epochs = 30
verbose_epoch = 0
learning_rate = 1
weight_decay = 0.2
batch_size = 100
lr_decay = 0.2
lr_decay_epoch = 10

drop_prob1 = 0.2
drop_prob2 = 0.5

net = get_net_dropout(drop_prob1, drop_prob2)
# net = get_net()

train_loss, test_loss = k_fold_cross_valid(net, k, epochs, verbose_epoch, X_train,
                                           y_train, batch_size, learning_rate, weight_decay, lr_decay, lr_decay_epoch)
print("%d-fold validation: Avg train loss: %f, Avg test loss: %f" %
      (k, train_loss, test_loss))

Start training on  gpu(0)

[ 1725.]
<NDArray 1 @cpu(0)> 90390.0
0.0190839694656

[ 0.]
<NDArray 1 @cpu(0)> 0.0
nan





[ 0.]
<NDArray 1 @cpu(0)> 0.0
nan

[ 0.]
<NDArray 1 @cpu(0)> 0.0
nan

[ 0.]
<NDArray 1 @cpu(0)> 0.0
nan


KeyboardInterrupt: 