In [None]:
import numpy as np
import mxnet as mx
from mxnet import gluon, autograd, nd, init
from mxnet.gluon import nn, loss as gloss, data as gdata
import time
import pickle

## evaluate accuracy

In [None]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum += ((net(X).reshape(-1) > 0.5) == y).sum().asscalar()
        n += y.size
    return acc_sum / n

## load data

In [None]:
def load_data(path):
    with open(path, 'rb') as f:
        data = pickle.load(f)
    return data

my_data = load_data('./MyData1.pkl')
my_data_ = load_data('./MyData2.pkl')
X, y = my_data['X'], my_data['y']
X, y = X.astype('float32'), y.astype('float32')
X_, y_ = my_data_['X_'], my_data_['y_']
X_, y_ = X_.astype('float32'), y_.astype('float32')
X_.shape, y_.shape

In [None]:
X_train, y_train, X_test, y_test = X, y, X_, y_
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [None]:
batch_size = 640

dataset_train = gdata.ArrayDataset(X_train, y_train)
dataset_test = gdata.ArrayDataset(X_test, y_test)
train_iter = gdata.DataLoader(dataset_train, batch_size, shuffle=True)
test_iter = gdata.DataLoader(dataset_test, batch_size, shuffle=True)

## MLP

In [None]:
def MyNet():
    net = nn.Sequential()
    net.add(
            nn.Dense(512, activation='relu'),
            nn.Dense(256, activation='relu'), 
            nn.Dense(128, activation='relu'),
            nn.Dense(1))
    return net

from sklearn.metrics import f1_score
def train(net, train_iter, test_iter, num_epochs, batch_size, trainer):
    loss = gloss.L2Loss()
    show_freq = 1
    start_ = time.time()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, test_f1 = 0.0, 0.0, 0, 0.0
        if (epoch+1) % show_freq == 1:
            start = time.time()
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X).reshape(-1)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += ((y_hat > 0.5) == y).sum().asscalar()
            n += y.size
        y_hat = net(nd.array(X_test)).reshape(-1)
        y_pred = [int(i.asscalar()>0.5) for i in y_hat]
        test_f1 = f1_score(y_test, y_pred)
        test_acc = evaluate_accuracy(test_iter, net)
        if (epoch+1) % show_freq == 0:
            time_consumed = time.time() - start_
            print('epoch: {}, train_loss: {:.4f}, train_acc: {:.4f}, test_f1:{:.4f}, test_acc:{:.4f},\
time_consumed :{:.2f} s)'.
                  format(epoch+1, train_l_sum/n, train_acc_sum/n, test_f1, test_acc, time_consumed))

In [None]:
net = MyNet()
net

## training

In [None]:
net = MyNet()
net.initialize()


lr = 0.01 # num 100
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})

In [None]:
num_epochs = 70
train(net, train_iter, test_iter, num_epochs, batch_size, trainer)

In [None]:
net.save_parameters("MyNet.pth")

## prediction

In [None]:
vec = load_data('./TestVec.pkl')
vec = vec.reshape(-1,2,73)
vec.shape

In [None]:
a = net(nd.array(vec))

In [None]:
a

In [None]:
b = [int(i.asscalar()>0.5) for i in a]

In [None]:
b.count(1)

In [None]:
import pandas as pd
test = pd.read_csv('sample_submission.csv')
test.predictions = b

In [None]:
test.to_csv('1.csv', index=False)