# 参考
- https://github.com/gaussic/text-classification mxnet(gluon)/pytorch实现
- https://mxnet.incubator.apache.org/tutorials/nlp/cnn.html mxnet(sym scratch) 

# load Data

In [1]:
from __future__ import print_function

from collections import Counter
import itertools
import numpy as np
import re

try:
    # For Python 3.0 and later
    from urllib.request import urlopen
except ImportError:
    # Fall back to Python 2's urllib2
    from urllib2 import urlopen
    
def clean_str(string):
    """
    Tokenization/string cleaning.
    Original from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    
    return string.strip().lower()

def download_sentences(url):
    """
    Download sentences from specified URL. 
    
    Strip trailing newline, convert to Unicode.
    """
    
    remote_file = urlopen(url)
    return [line.decode('Latin1').strip() for line in remote_file.readlines()]
    
def load_data_and_labels():
    """
    Loads polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """

    positive_examples = download_sentences('https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.pos')
    negative_examples = download_sentences('https://raw.githubusercontent.com/yoonkim/CNN_sentence/master/rt-polarity.neg')
    
    # Tokenize
    x_text = positive_examples + negative_examples
    x_text = [clean_str(sent).split(" ") for sent in x_text]

    # Generate labels
    positive_labels = [1 for _ in positive_examples]
    negative_labels = [0 for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return x_text, y


def pad_sentences(sentences, padding_word=""):
    """
    Pads all sentences to be the length of the longest sentence.
    Returns padded sentences.
    """
    sequence_length = max(len(x) for x in sentences)
    padded_sentences = []
    for i in range(len(sentences)):
        sentence = sentences[i]
        num_padding = sequence_length - len(sentence)
        new_sentence = sentence + [padding_word] * num_padding
        padded_sentences.append(new_sentence)
        
    return padded_sentences


def build_vocab(sentences):
    """
    Builds a vocabulary mapping from token to index based on the sentences.
    Returns vocabulary mapping and inverse vocabulary mapping.
    """
    # Build vocabulary
    word_counts = Counter(itertools.chain(*sentences))
    
    # Mapping from index to word
    vocabulary_inv = [x[0] for x in word_counts.most_common()]
    
    # Mapping from word to index
    vocabulary = {x: i for i, x in enumerate(vocabulary_inv)}
    
    return vocabulary, vocabulary_inv


def build_input_data(sentences, labels, vocabulary):
    """
    Maps sentences and labels to vectors based on a vocabulary.
    """
    x = np.array([
            [vocabulary[word] for word in sentence]
            for sentence in sentences])
    y = np.array(labels)
    
    return x, y

"""
Loads and preprocesses data for the MR dataset.
Returns input vectors, labels, vocabulary, and inverse vocabulary.
"""
# Load and preprocess data
sentences, labels = load_data_and_labels()
sentences_padded = pad_sentences(sentences)
vocabulary, vocabulary_inv = build_vocab(sentences_padded)
x, y = build_input_data(sentences_padded, labels, vocabulary)

vocab_size = len(vocabulary)

# randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

# split train/test set
# there are a total of 10662 labeled examples to train on
x_train, x_test = x_shuffled[:-1000], x_shuffled[-1000:]
y_train, y_test = y_shuffled[:-1000], y_shuffled[-1000:]

sentence_size = x_train.shape[1]

print('Train/Test split: %d/%d' % (len(y_train), len(y_test)))
print('train shape:', x_train.shape)
print('test shape:', x_test.shape)
print('vocab_size', vocab_size)
print('sentence max words', sentence_size)

Train/Test split: 9662/1000
train shape: (9662, 56)
test shape: (1000, 56)
vocab_size 18766
sentence max words 56


# gen train test set 

In [2]:
from mxnet import gluon
batch_size = 50
dataset_train = gluon.data.ArrayDataset(x_train.astype('float32'), y_train.astype('float32'))
train_data = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)
dataset_test = gluon.data.ArrayDataset(x_test.astype('float32'), y_test.astype('float32'))
test_data = gluon.data.DataLoader(dataset_test, batch_size, shuffle=True)

  Optimizer.opt_registry[name].__name__))


In [3]:
for data, label in test_data:
    print(data, label)
    break


[[  1.70000000e+01   2.70000000e+01   7.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  2.75600000e+03   8.00000000e+00   1.38000000e+02 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  1.31430000e+04   3.40000000e+01   1.00000000e+00 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 ..., 
 [  1.00000000e+00   1.40000000e+01   8.30000000e+01 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  6.00000000e+00   5.68700000e+03   1.91000000e+02 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  4.50000000e+01   1.41000000e+02   1.88000000e+02 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]]
<NDArray 50x56 @cpu(0)> 
[ 0.  1.  1.  1.  0.  0.  1.  0.  0.  1.  1.  1.  0.  0.  0.  1.  1.  0.
  1.  1.  1.  0.  1.  1.  0.  1.  0.  1.  0.  1.  1.  1.  0.  1.  1.  1.
  0.  0.  0.  1.  1.  1.  1.  1.  1.  0.  1.  0.  0.  0.]
<NDArray 50 @cpu(0)>


In [None]:
from mxnet import gluon
batch_size = 50
dataset_train = gluon.data.ArrayDataset(x_train.reshape(x_train.shape[0], 1, x_train.shape[1]).astype('float32'), y_train.astype('float32'))
train_data = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)
dataset_test = gluon.data.ArrayDataset(x_test.reshape(x_test.shape[0], 1, x_test.shape[1]).astype('float32'), y_test.astype('float32'))
test_data = gluon.data.DataLoader(dataset_test, batch_size, shuffle=True)

In [9]:
batch_size = batch_size
#     train_data, test_data = utils.load_data_fashion_mnist(batch_size)
ctx = utilstmp.try_gpu()
#     net = net_structure(num_hidden=num_hidden, num_outputs=num_outputs, ctx=ctx)
net = gluon.nn.Sequential()
num_embed = 300 # dimensions to embed words into
filter_size = 3

with net.name_scope():
#     net.add(
#         nn.Embedding(vocab_size, num_embed),
#         nn.Conv1D(num_embed, 3),
#         nn.GlobalMaxPool1D(),
#         nn.Dropout(0.5),
#         nn.Dense(2)
#     )
    net.add(gluon.nn.Embedding(input_dim=vocab_size, output_dim=num_embed))
    # 输入输出数据格式是 batch x channel x height x width
    net.add(gluon.nn.Conv1D(channels=1, kernel_size=filter_size, activation='relu'))
    net.add(gluon.nn.GlobalMaxPool1D())
    net.add(gluon.nn.Dropout(0.5))
    net.add(gluon.nn.Dense(2, activation="relu"))
net.initialize(ctx=ctx)
#     softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()


In [114]:
ctx = utilstmp.try_gpu()
config = TCNNConfig()
config.vocab_size = vocab_size
V = config.vocab_size
E = config.embedding_dim
Nf = config.num_filters
Ks = config.kernel_sizes
C = config.num_classes
Dr = config.dropout_prob
net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Embedding(V, E),
        nn.Conv1D(Nf, Ks[0]),
        nn.GlobalMaxPool1D(),
        nn.Dropout(Dr),
        nn.Dense(C)
    )
net.initialize(ctx=ctx)

In [109]:
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': config.learning_rate})
loss = gluon.loss.SoftmaxCrossEntropyLoss()
utilstmp.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=config.num_epochs)

Start training on  gpu(0)
Epoch 0. Loss: 0.694, Train acc 0.50, Test acc 0.48, Time 1.3 sec
Epoch 1. Loss: 0.683, Train acc 0.57, Test acc 0.60, Time 1.1 sec
Epoch 2. Loss: 0.518, Train acc 0.76, Test acc 0.73, Time 1.1 sec
Epoch 3. Loss: 0.256, Train acc 0.91, Test acc 0.74, Time 1.1 sec
Epoch 4. Loss: 0.096, Train acc 0.97, Test acc 0.73, Time 1.1 sec
Epoch 5. Loss: 0.032, Train acc 0.99, Test acc 0.73, Time 1.1 sec
Epoch 6. Loss: 0.013, Train acc 1.00, Test acc 0.73, Time 1.1 sec
Epoch 7. Loss: 0.007, Train acc 1.00, Test acc 0.73, Time 1.1 sec
Epoch 8. Loss: 0.004, Train acc 1.00, Test acc 0.73, Time 1.1 sec
Epoch 9. Loss: 0.003, Train acc 1.00, Test acc 0.73, Time 1.1 sec


In [115]:
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': config.learning_rate})
#     import pdb
#     pdb.set_trace() 
utilstmp.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=5)

Start training on  gpu(0)
Epoch 0. Loss: 0.694, Train acc 0.49, Test acc 0.51, Time 1.1 sec
Epoch 1. Loss: 0.693, Train acc 0.50, Test acc 0.50, Time 1.0 sec
Epoch 2. Loss: 0.693, Train acc 0.50, Test acc 0.50, Time 1.0 sec
Epoch 3. Loss: 0.693, Train acc 0.50, Test acc 0.49, Time 1.0 sec
Epoch 4. Loss: 0.693, Train acc 0.50, Test acc 0.50, Time 1.1 sec


In [12]:
# %pdb on
mlp(num_epoch=50)

Start training on  gpu(0)
Epoch 0. Loss: 0.693, Train acc 0.49, Test acc 0.52, Time 1.9 sec
Epoch 1. Loss: 0.693, Train acc 0.50, Test acc 0.52, Time 1.2 sec
Epoch 2. Loss: 0.693, Train acc 0.51, Test acc 0.48, Time 1.2 sec
Epoch 3. Loss: 0.693, Train acc 0.50, Test acc 0.48, Time 1.2 sec
Epoch 4. Loss: 0.692, Train acc 0.50, Test acc 0.48, Time 1.2 sec
Epoch 5. Loss: 0.690, Train acc 0.49, Test acc 0.51, Time 1.2 sec
Epoch 6. Loss: 0.681, Train acc 0.51, Test acc 0.56, Time 1.2 sec
Epoch 7. Loss: 0.667, Train acc 0.55, Test acc 0.58, Time 1.2 sec
Epoch 8. Loss: 0.651, Train acc 0.57, Test acc 0.58, Time 1.2 sec
Epoch 9. Loss: 0.631, Train acc 0.60, Test acc 0.60, Time 1.2 sec
Epoch 10. Loss: 0.623, Train acc 0.62, Test acc 0.62, Time 1.2 sec
Epoch 11. Loss: 0.604, Train acc 0.65, Test acc 0.63, Time 1.2 sec
Epoch 12. Loss: 0.592, Train acc 0.66, Test acc 0.63, Time 1.2 sec
Epoch 13. Loss: 0.582, Train acc 0.68, Test acc 0.65, Time 1.2 sec
Epoch 14. Loss: 0.576, Train acc 0.68, Test ac

In [19]:
from mxnet.gluon import nn
class TCNNConfig(object):
    """
    CNN parameters
    """
    embedding_dim = 128  # embedding vector size
    seq_length = 50  # maximum length of sequence
    vocab_size = 8000  # most common words

    num_filters = 100  # number of the convolution filters (feature maps)
    kernel_sizes = [3, 4, 5]  # three kinds of kernels (windows)

    dropout_prob = 0.5  # dropout rate
    learning_rate = 1e-3  # learning rate
    batch_size = 50  # batch size for training
    num_epochs = 10  # total number of epochs

    num_classes = 2  # number of classes

    test_split = 0.1  # percentage of test data


class Conv_Max_Pooling(nn.Block):
    """
    Integration of Conv1D and GlobalMaxPool1D layers
    """

    def __init__(self, channels, kernel_size, **kwargs):
        super(Conv_Max_Pooling, self).__init__(**kwargs)

        with self.name_scope():
            self.conv = nn.Conv1D(channels, kernel_size)
            self.pooling = nn.GlobalMaxPool1D()

    def forward(self, x):
        output = self.pooling(self.conv(x))
        return nd.relu(output).flatten()


class TextCNN(nn.Block):
    """
    CNN text classification model, based on the paper.
    """

    def __init__(self, config, **kwargs):
        super(TextCNN, self).__init__(**kwargs)

        V = config.vocab_size
        E = config.embedding_dim
        Nf = config.num_filters
        Ks = config.kernel_sizes
        C = config.num_classes
        Dr = config.dropout_prob

        with self.name_scope():
            self.embedding = nn.Embedding(V, E)  # embedding layer

            # three different convolutional layers
            self.conv1 = Conv_Max_Pooling(Nf, Ks[0])
            self.conv2 = Conv_Max_Pooling(Nf, Ks[1])
            self.conv3 = Conv_Max_Pooling(Nf, Ks[2])
            self.dropout = nn.Dropout(Dr)  # a dropout layer
            self.fc1 = nn.Dense(C)  # a dense layer for classification

    def forward(self, x):
        x = self.embedding(x).transpose((0, 2, 1))  # Conv1D takes in NCW as input
        o1, o2, o3 = self.conv1(x), self.conv2(x), self.conv3(x)
        outputs = self.fc1(self.dropout(nd.concat(o1, o2, o3)))

        return outputs


In [20]:
from mxnet.gluon import nn

ctx = utilstmp.try_gpu()
config = TCNNConfig()
config.vocab_size = vocab_size
V = config.vocab_size
E = config.embedding_dim
Nf = config.num_filters
Ks = config.kernel_sizes
C = config.num_classes
Dr = config.dropout_prob
net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Embedding(V, E),
        nn.Conv1D(Nf, Ks[0]),
        nn.GlobalMaxPool1D(),
        nn.Dropout(Dr),
        nn.Dense(C)
    )
net.initialize(ctx=ctx)

In [21]:
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': config.learning_rate})
loss = gluon.loss.SoftmaxCrossEntropyLoss()
utilstmp.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=config.num_epochs)

Start training on  gpu(0)
Epoch 0. Loss: 0.694, Train acc 0.50, Test acc 0.52, Time 1.9 sec
Epoch 1. Loss: 0.686, Train acc 0.56, Test acc 0.52, Time 1.1 sec
Epoch 2. Loss: 0.564, Train acc 0.72, Test acc 0.73, Time 1.1 sec
Epoch 3. Loss: 0.301, Train acc 0.88, Test acc 0.75, Time 1.1 sec
Epoch 4. Loss: 0.109, Train acc 0.97, Test acc 0.76, Time 1.1 sec
Epoch 5. Loss: 0.032, Train acc 0.99, Test acc 0.75, Time 1.1 sec
Epoch 6. Loss: 0.011, Train acc 1.00, Test acc 0.75, Time 1.1 sec
Epoch 7. Loss: 0.006, Train acc 1.00, Test acc 0.76, Time 1.1 sec
Epoch 8. Loss: 0.003, Train acc 1.00, Test acc 0.76, Time 1.1 sec
Epoch 9. Loss: 0.002, Train acc 1.00, Test acc 0.75, Time 1.1 sec


In [26]:
set(y_train)

{0, 1}

In [11]:
from mxnet import gluon

import sys
sys.path.append('.')
from mxnet import ndarray as nd
from mxnet import autograd
import utilstmp
import time

def net_structure(num_hidden, num_outputs, ctx):
    net = gluon.nn.Sequential()
    num_embed = 300 # dimensions to embed words into
    filter_size = 3
    
    with net.name_scope():
#         net.add(
#             nn.Embedding(vocab_size, num_embed),
#             nn.Conv1D(num_embed, 3),
#             nn.GlobalMaxPool1D(),
#             nn.Dropout(0.5),
#             nn.Dense(num_outputs)
#         )
#         net.add(gluon.nn.Flatten())
        net.add(gluon.nn.Embedding(input_dim=vocab_size, output_dim=num_embed))
        # 输入输出数据格式是 batch x channel x height x width
        net.add(gluon.nn.Conv1D(channels=1, kernel_size=filter_size, activation='relu'))
        net.add(gluon.nn.GlobalMaxPool1D())
        net.add(gluon.nn.Dropout(0.5))
        net.add(gluon.nn.Dense(num_outputs, activation="relu"))
#         net.add(gluon.nn.softmax_cross_entropy)
        print(net)
        print(net.collect_params())
    
    net.initialize(ctx=ctx)
    return net

def mlp(optimizer='sgd', num_outputs=2, num_hidden=256, weight_scale=.01, learning_rate=0.0005, 
        num_epoch=50, batch_size=50):
    batch_size = batch_size
#     train_data, test_data = utils.load_data_fashion_mnist(batch_size)
    ctx = utilstmp.try_gpu()
#     net = net_structure(num_hidden=num_hidden, num_outputs=num_outputs, ctx=ctx)
#     softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': learning_rate})
#     import pdb
#     pdb.set_trace() 
    utilstmp.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=num_epoch)
#     for epoch in range(num_epoch):
#         tic = time.time()
#         train_loss = 0.
#         train_acc = 0.
#         for data, label in train_data:
#             with autograd.record():
#                 output = net(data)
#                 loss = softmax_cross_entropy(output, label)
#             loss.backward()
#             trainer.step(batch_size)
            
#             # End of training loop for this epoch
#             toc = time.time()
#             train_time = toc - tic

#             train_loss += nd.mean(loss).asscalar()
#             train_acc += utilstmp.accuracy(output, label)

#         test_acc = utilstmp.evaluate_accuracy(test_data, net)
#         print("Epoch %d. Training Time: %.3fs, Loss: %f, Train acc %f, Test acc %f" % (
#             epoch, train_time, train_loss/len(train_data), train_acc/len(train_data), test_acc))

In [14]:
from mxnet import ndarray as nd

num_inputs = 56
num_outputs = 2

num_hidden = 256
weight_scale = .01

W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
b1 = nd.zeros(num_hidden)

W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)
b2 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2]

for param in params:
    param.attach_grad()

In [15]:
def relu(X):
    return nd.maximum(X, 0)

In [16]:
def net(X):
#     X = X.reshape((-1, num_inputs))# flatten 矩阵变数组
    h1 = relu(nd.dot(X, W1) + b1)
    output = nd.dot(h1, W2) + b2
    return output

In [17]:
from mxnet import gluon
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [18]:
%pdb on
from mxnet import autograd as autograd

learning_rate = .5

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        utilstmp.SGD(params, learning_rate/batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)

    test_acc = utilstmp.evaluate_accuracy(test_data, net)
    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

Automatic pdb calling has been turned ON


NameError: name 'utils' is not defined

> [0;32m<ipython-input-18-bca68c232320>[0m(14)[0;36m<module>[0;34m()[0m
[0;32m     12 [0;31m            [0mloss[0m [0;34m=[0m [0msoftmax_cross_entropy[0m[0;34m([0m[0moutput[0m[0;34m,[0m [0mlabel[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m        [0mloss[0m[0;34m.[0m[0mbackward[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m---> 14 [0;31m        [0mutils[0m[0;34m.[0m[0mSGD[0m[0;34m([0m[0mparams[0m[0;34m,[0m [0mlearning_rate[0m[0;34m/[0m[0mbatch_size[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     15 [0;31m[0;34m[0m[0m
[0m[0;32m     16 [0;31m        [0mtrain_loss[0m [0;34m+=[0m [0mnd[0m[0;34m.[0m[0mmean[0m[0;34m([0m[0mloss[0m[0;34m)[0m[0;34m.[0m[0masscalar[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m
ipdb> q


In [12]:
idx=0
for l in x_train:
    if(l[0]==44 and l[1]==423 and l[2]==2):
        print(idx)
        break
    idx += 1
print(x_train[idx], y_train[idx]) 

3742
[  44  423    2   44  390    2    4  493 1485    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0] 0


In [4]:
from mxnet import gluon
from mxnet.gluon import nn

import sys
sys.path.append('.')
from mxnet import ndarray as nd
from mxnet import autograd
import utilstmp
import time

def net_structure(num_hidden, num_outputs, ctx, num_embed):
    net = gluon.nn.Sequential()
    num_embed = num_embed # dimensions to embed words into
    filter_size = 3
    
    with net.name_scope():
        net.add(
            nn.Embedding(vocab_size, num_embed),
            nn.Conv1D(num_embed, 3),
            nn.GlobalMaxPool1D(),
            nn.Dropout(0.5),
            nn.Dense(num_outputs)
        )
        print(net)
        print(net.collect_params())
    net.initialize(ctx=ctx)
    return net

def mlp(optimizer='sgd', num_outputs=2, num_hidden=256, weight_scale=.01, learning_rate=0.0005, 
        num_epoch=10, batch_size=50, num_embed=300):
    batch_size = batch_size

    ctx = utilstmp.try_gpu(device_id=1)
    net = net_structure(num_hidden=num_hidden, num_outputs=num_outputs, ctx=ctx, num_embed=num_embed)
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': learning_rate})
    utilstmp.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=num_epoch)

In [6]:
  """
    embedding_dim = 128  # embedding vector size
    seq_length = 50  # maximum length of sequence
    vocab_size = 8000  # most common words

    num_filters = 100  # number of the convolution filters (feature maps)
    kernel_sizes = [3, 4, 5]  # three kinds of kernels (windows)

    dropout_prob = 0.5  # dropout rate
    learning_rate = 1e-3  # learning rate
    batch_size = 50  # batch size for training
    num_epochs = 10  # total number of epochs

    num_classes = 2  # number of classes

    test_split = 0.1  # percentage of test data
    """
mlp(learning_rate=0.003, num_epoch=10, batch_size=5, optimizer='adam', num_embed=128)

Sequential(
  (0): Embedding(18766 -> 128, float32)
  (1): Conv1D(None -> 128, kernel_size=(3,), stride=(1,))
  (2): GlobalMaxPool1D(size=(1,), stride=(1,), padding=(0,), ceil_mode=True)
  (3): Dropout(p = 0.5)
  (4): Dense(None -> 2, linear)
)
sequential1_ (
  Parameter sequential1_embedding0_weight (shape=(18766, 128), dtype=<class 'numpy.float32'>)
  Parameter sequential1_conv0_weight (shape=(128, 0, 3), dtype=<class 'numpy.float32'>)
  Parameter sequential1_conv0_bias (shape=(128,), dtype=<class 'numpy.float32'>)
  Parameter sequential1_dense0_weight (shape=(2, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential1_dense0_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)
Start training on  gpu(1)
Epoch 0. Loss: 0.694, Train acc 0.51, Test acc 0.52, Time 1.3 sec
Epoch 1. Loss: 0.529, Train acc 0.73, Test acc 0.75, Time 1.1 sec
Epoch 2. Loss: 0.195, Train acc 0.92, Test acc 0.75, Time 1.1 sec
Epoch 3. Loss: 0.037, Train acc 0.99, Test acc 0.77, Time 1.1 sec
Epoch 4. Loss: 0.006,

In [26]:
from mxnet import gluon
from mxnet.gluon import nn

import sys
sys.path.append('.')
from mxnet import ndarray as nd
from mxnet import autograd
import utilstmp
import time

def net_structure(num_hidden, num_outputs, ctx, num_embed):
#     net = gluon.nn.Sequential()
#     num_embed = num_embed # dimensions to embed words into
#     filter_size = 3
    
#     with net.name_scope():
#         net.add(
#             nn.Embedding(vocab_size, num_embed),
#             nn.Conv1D(num_embed, 3),
#             nn.GlobalMaxPool1D(),
#             nn.Dropout(0.5),
#             nn.Dense(num_outputs)
#         )
#         print(net)
#         print(net.collect_params())
#     net.initialize(ctx=ctx)

    config = TCNNConfig()
    config.vocab_size = vocab_size
    V = config.vocab_size
    E = config.embedding_dim
    Nf = config.num_filters
    Ks = config.kernel_sizes
    C = config.num_classes
    Dr = config.dropout_prob
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.Embedding(V, E),
            nn.Conv1D(Nf, Ks[0]),
            nn.GlobalMaxPool1D(),
            nn.Dropout(Dr),
            nn.Dense(C)
        )
    net.initialize(ctx=ctx)
    return net

def mlp(optimizer='sgd', num_outputs=2, num_hidden=256, weight_scale=.01, learning_rate=0.0005, 
        num_epoch=10, batch_size=50, num_embed=300):
    batch_size = batch_size

    ctx = utilstmp.try_gpu(device_id=1)
    net = net_structure(num_hidden=num_hidden, num_outputs=num_outputs, ctx=ctx, num_embed=num_embed)
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': learning_rate})
    utilstmp.train(train_data, test_data, net, loss, trainer, ctx, num_epochs=num_epoch)

In [30]:
mlp(learning_rate=1e-3, num_epoch=10, batch_size=50, optimizer='adam')

Start training on  gpu(1)
Epoch 0. Loss: 0.693, Train acc 0.50, Test acc 0.48, Time 1.3 sec
Epoch 1. Loss: 0.688, Train acc 0.55, Test acc 0.57, Time 1.1 sec
Epoch 2. Loss: 0.578, Train acc 0.72, Test acc 0.72, Time 1.1 sec
Epoch 3. Loss: 0.315, Train acc 0.88, Test acc 0.72, Time 1.1 sec
Epoch 4. Loss: 0.134, Train acc 0.96, Test acc 0.73, Time 1.1 sec
Epoch 5. Loss: 0.053, Train acc 0.99, Test acc 0.73, Time 1.1 sec
Epoch 6. Loss: 0.024, Train acc 1.00, Test acc 0.73, Time 1.1 sec
Epoch 7. Loss: 0.012, Train acc 1.00, Test acc 0.75, Time 1.1 sec
Epoch 8. Loss: 0.007, Train acc 1.00, Test acc 0.73, Time 1.1 sec
Epoch 9. Loss: 0.004, Train acc 1.00, Test acc 0.74, Time 1.1 sec


In [46]:
train_data.shape

AttributeError: 'DataLoader' object has no attribute 'shape'

> [0;32m<ipython-input-46-8852729f0f48>[0m(1)[0;36m<module>[0;34m()[0m
[0;32m----> 1 [0;31m[0mtrain_data[0m[0;34m.[0m[0mshape[0m[0;34m[0m[0m
[0m
ipdb> q


In [60]:
net.collect_params()

NameError: name 'net' is not defined

> [0;32m<ipython-input-60-17bdb091efb0>[0m(1)[0;36m<module>[0;34m()[0m
[0;32m----> 1 [0;31m[0mnet[0m[0;34m.[0m[0mcollect_params[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m
ipdb> q


In [48]:
for i in a:
    print(i)
    break

(
[[[[ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   ..., 
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]]]


 [[[ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   ..., 
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]]]


 [[[ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   ..., 
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]]]


 ..., 
 [[[ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   ..., 
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]]]


 [[[ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   [ 0.  0.  0. ...,  0.  0.  0.]
   ..., 
   [ 0.  0. 

In [79]:
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    net.add(
        nn.Conv2D(channels=20, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(128, activation="relu"),
        nn.Dense(10)
    )
from mxnet import gluon
import sys
sys.path.append('..')
import utilstmp

# 初始化
ctx = utilstmp.try_gpu()
net.initialize(ctx=ctx)
print('initialize weight on', ctx)

# 获取数据
batch_size = 256
train_data, test_data = utilstmp.load_data_fashion_mnist(batch_size)

# 训练
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
                        'sgd', {'learning_rate': 0.5})
utilstmp.train(train_data, test_data, net, loss,
            trainer, ctx, num_epochs=5)

initialize weight on gpu(0)
Start training on  gpu(0)
Epoch 0. Loss: 1.238, Train acc 0.53, Test acc 0.75, Time 2.2 sec
Epoch 1. Loss: 0.535, Train acc 0.79, Test acc 0.79, Time 1.8 sec
Epoch 2. Loss: 0.440, Train acc 0.83, Test acc 0.84, Time 1.8 sec
Epoch 3. Loss: 0.392, Train acc 0.85, Test acc 0.87, Time 1.8 sec
Epoch 4. Loss: 0.354, Train acc 0.87, Test acc 0.88, Time 1.8 sec


In [None]:
utilstmp.try_gpu

In [32]:
import mxnet as mx

In [59]:
W1.astype('int64')


[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]
<NDArray 56x256 @cpu(0)>

In [7]:
tmp=0
for data, label in test_data:
    print(data, label)
    tmp=data
    break


[[   1  175    7 ...,    0    0    0]
 [  41   37    6 ...,    0    0    0]
 [  27    5    1 ...,    0    0    0]
 ..., 
 [5601 3371    4 ...,    0    0    0]
 [1194   16 1140 ...,    0    0    0]
 [2690    2 5702 ...,    0    0    0]]
<NDArray 50x56 @cpu(0)> 
[0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0 0 0 0 1 0 0
 0 1 0 1 0 1 1 1 0 1 1 0 1]
<NDArray 50 @cpu(0)>


In [8]:
nd.dot(tmp.astype('float32'), W1)


[[ -53.59818268  -66.17845154 -137.74136353 ...,  140.63609314
    73.84160614   53.23345184]
 [ 130.25120544 -148.00364685  210.47557068 ...,    1.83334064
    80.29856873 -223.39505005]
 [  -9.79635906   -2.76786876  -16.18431473 ...,   16.98617172   14.4361124
   -12.94740009]
 ..., 
 [ 311.0447998   -86.59976196   36.55081558 ...,  315.77960205
  -151.91706848 -549.48803711]
 [  88.37377167   97.152771    -61.81256866 ...,   -2.53061891
   130.60990906  -47.19386673]
 [ 145.34436035  397.92401123   14.76320648 ...,  156.37654114
   336.85375977 -424.31954956]]
<NDArray 50x256 @cpu(0)>

In [80]:
from mxnet import gluon

import sys
sys.path.append('.')
from mxnet import ndarray as nd
from mxnet import autograd
import utilstmp
import time

def net_structure(num_hidden, num_outputs, ctx):
    net = gluon.nn.Sequential()
    num_embed = 300 # dimensions to embed words into
    filter_size = 3
    
    with net.name_scope():
        net.add(
            nn.Conv2D(channels=20, kernel_size=5, activation='relu'),
            nn.MaxPool2D(pool_size=2, strides=2),
            nn.Conv2D(channels=50, kernel_size=3, activation='relu'),
            nn.MaxPool2D(pool_size=2, strides=2),
            nn.Flatten(),
            nn.Dense(128, activation="relu"),
            nn.Dense(10)
        )
        print(net)
        print(net.collect_params())
    
    net.initialize(ctx=ctx)
    return net

def mlp(optimizer='sgd', num_outputs=2, num_hidden=256, weight_scale=.01, learning_rate=0.0005, 
        num_epoch=50, batch_size=50):
    batch_size = batch_size
#     train_data, test_data = utils.load_data_fashion_mnist(batch_size)
    ctx = utilstmp.try_gpu()
    net = net_structure(num_hidden=num_hidden, num_outputs=num_outputs, ctx=ctx)
    
#     softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    batch_size = 256
    train_data, test_data = utilstmp.load_data_fashion_mnist(batch_size)

    # 训练
    loss = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(),
                            'sgd', {'learning_rate': 0.5})
    utilstmp.train(train_data, test_data, net, loss,
                trainer, ctx, num_epochs=5)

In [50]:
list(dataset_train[0][0])

[1789,
 44,
 56,
 5,
 54,
 451,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]


[ 1789.    44.    56.     5.    54.   451.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.     0.     0.     0.     0.
     0.     0.     0.     0.     0.     0.]
<NDArray 56 @cpu(0)>