### 创建神经网络

In [5]:
from mxnet import nd
from mxnet.gluon import nn

net = nn.Sequential()

with net.name_scope():
    net.add(nn.Dense(256,activation='relu'))
    net.add(nn.Dense(10))
print(net)

Sequential(
  (0): Dense(None -> 256, Activation(relu))
  (1): Dense(None -> 10, linear)
)


### 使用 nn.Block来定义

In [5]:
class MLP(nn.Block):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = nn.Dense(256)
            self.dense1 = nn.Dense(10)
            
    def forward(self,x):
        return self.dense1(nd.relu(self.dense0(x)))

In [6]:
net2 = MLP()
print(net2)
net2.initialize()
x = nd.random_uniform(shape=(4,20))
y = net2(x)
y

MLP(
  (dense0): Dense(None -> 256, linear)
  (dense1): Dense(None -> 10, linear)
)



[[ 0.03126615  0.04562764  0.00039857 -0.08772386 -0.05355632  0.02904574
   0.08102557 -0.01433946 -0.04224151  0.06047882]
 [ 0.02871901  0.03652265  0.00630051 -0.05650971 -0.07189322  0.08615957
   0.05951559 -0.06045965 -0.0299026   0.05651001]
 [ 0.02147349  0.04818896  0.05321142 -0.12616856 -0.0685023   0.09096345
   0.04064304 -0.05064794 -0.02200242  0.04859561]
 [ 0.03780478  0.0751239   0.03290457 -0.11641113 -0.03254967  0.0586529
   0.02542157 -0.01697343 -0.00049652  0.05892839]]
<NDArray 4x10 @cpu(0)>

#### 主要用于标识属于那个，方便识别

In [7]:
print('default predix:',net2.dense0.name)

net3 = MLP(prefix='another_mlp_')
print('customized prefix:',net3.dense0.name)

default predix: mlp0_dense0
customized prefix: another_mlp_dense0


### Sequential

In [8]:
class Sequential(nn.Block):
    def __init__(self,**kwargs):
        super(Sequential,self).__init__(**kwargs)
    def add(self,block):
        self._children.append(block)
    def forward(self,x):
        for block in self._children:
            x = block(x)
        return x

In [9]:
net4 = Sequential()

with net4.name_scope():
    net4.add(nn.Dense(256,activation='relu'))
    net4.add(nn.Dense(10))
    
net4.initialize()
y = net4(x)
y


[[-0.00411107  0.00781807  0.03506001 -0.01106469  0.09599376 -0.04190594
   0.01127483 -0.01493319  0.0716491   0.00700368]
 [ 0.01214233  0.02546027  0.03533494 -0.02328116  0.10768862 -0.01672854
  -0.02653831 -0.03458688  0.0640486  -0.00030123]
 [-0.00452384  0.00228632  0.02761049 -0.05750641  0.10328892 -0.01792853
  -0.04610601 -0.04085524  0.05824737  0.00033787]
 [-0.00518477 -0.02185423  0.02528594 -0.00436605  0.05142229 -0.02703231
   0.01939205 -0.03802725  0.02832718 -0.0172073 ]]
<NDArray 4x10 @cpu(0)>

### 初始化模型参数

In [6]:
from mxnet.gluon import nn
from mxnet import nd

def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(4,activation='relu'))
        net.add(nn.Dense(2))
    return net
x = nd.random.uniform(shape=(3,5))

In [27]:
x


[[0.3742962  0.32440487 0.74878824 0.6801155  0.23780724]
 [0.79553473 0.1718531  0.5039336  0.44929165 0.2962424 ]
 [0.3044684  0.88596225 0.8391891  0.35187057 0.23774183]]
<NDArray 3x5 @cpu(0)>

In [28]:
import sys
try:
    net = get_net()
    net(x)
except RuntimeError as err:
    sys.stderr.write(str(err))

Parameter sequential3_dense0_weight has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

In [29]:
net.initialize()
net(x)


[[-0.00463657 -0.00280832]
 [-0.0049372  -0.00291731]
 [-0.00458132 -0.00321118]]
<NDArray 3x2 @cpu(0)>

### 访问模型参数

In [30]:
w = net[0].weight
b = net[0].bias
print('name:',net[0].name,'\n\nnweight:',w,'\n\nnbias:',b)

name: sequential3_dense0 

nweight: Parameter sequential3_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>) 

nbias: Parameter sequential3_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)


In [31]:
print('weight:',w.data())
print('weight gradient',w.grad())
print('bias:',b.data())
print('bias gradient',b.grad())

weight: 
[[ 0.03337464  0.00033452  0.00775058  0.0619617  -0.04178752]
 [ 0.01875968  0.00679267  0.05142052  0.00274999  0.06162936]
 [-0.02117043  0.03510708 -0.06655321  0.02794051 -0.0491639 ]
 [ 0.06551518 -0.05154072  0.06921611  0.02910844 -0.00674497]]
<NDArray 4x5 @cpu(0)>
weight gradient 
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<NDArray 4x5 @cpu(0)>
bias: 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>
bias gradient 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [37]:
params = net.collect_params()
print(params)
print(params['sequential3_dense0_bias'].data())
print(params.get('dense0_weight').data())

sequential3_ (
  Parameter sequential3_dense0_weight (shape=(4, 5), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_weight (shape=(2, 4), dtype=<class 'numpy.float32'>)
  Parameter sequential3_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)

[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>

[[ 0.03337464  0.00033452  0.00775058  0.0619617  -0.04178752]
 [ 0.01875968  0.00679267  0.05142052  0.00274999  0.06162936]
 [-0.02117043  0.03510708 -0.06655321  0.02794051 -0.0491639 ]
 [ 0.06551518 -0.05154072  0.06921611  0.02910844 -0.00674497]]
<NDArray 4x5 @cpu(0)>


### 使用不同的初始化函数来初始化

In [43]:
from mxnet import init
params.initialize(init=init.Normal(sigma=0.02),force_reinit=True)
print(net[0].weight.data(),net[0].bias.data())


[[ 0.00831061 -0.01397313 -0.00663516 -0.00953266 -0.01912067]
 [ 0.03268983 -0.01800025  0.04716996  0.00495642  0.02328127]
 [ 0.01597777 -0.0028479   0.00061696 -0.01085523  0.00124892]
 [ 0.0115438  -0.01638925 -0.02512253 -0.01027825 -0.01704522]]
<NDArray 4x5 @cpu(0)> 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [60]:
params.initialize(init=init.One(),force_reinit=True)
print(net[0].weight.data(),net[0].bias.data())


[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
<NDArray 4x5 @cpu(0)> 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


### 延后初始化

In [64]:
net = get_net()
print(net.collect_params())

sequential5_ (
  Parameter sequential5_dense0_weight (shape=(4, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential5_dense0_bias (shape=(4,), dtype=<class 'numpy.float32'>)
  Parameter sequential5_dense1_weight (shape=(2, 0), dtype=<class 'numpy.float32'>)
  Parameter sequential5_dense1_bias (shape=(2,), dtype=<class 'numpy.float32'>)
)


In [65]:
net.initialize()

### 共享模型参数

In [66]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(4,in_units=4,activation='relu'))
    net.add(nn.Dense(4,in_units=4,activation='relu',params=net[-1].params))
    net.add(nn.Dense(2,in_units=4))

In [69]:
net.initialize()
print(net[0].weight.data())
print(net[1].weight.data())


[[-0.00617422  0.00014884  0.01992925 -0.01730552]
 [-0.03443647 -0.01891235  0.00715108 -0.03347337]
 [-0.05153355 -0.00056416 -0.04192776  0.02544359]
 [ 0.00196762 -0.03117236 -0.05635092  0.00341317]]
<NDArray 4x4 @cpu(0)>

[[-0.00617422  0.00014884  0.01992925 -0.01730552]
 [-0.03443647 -0.01891235  0.00715108 -0.03347337]
 [-0.05153355 -0.00056416 -0.04192776  0.02544359]
 [ 0.00196762 -0.03117236 -0.05635092  0.00341317]]
<NDArray 4x4 @cpu(0)>


  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)
  "Set force_reinit=True to re-initialize."%self.name)


### 序列化---读写模型

### 读写NdArrays

In [74]:
from mxnet import nd

x = nd.ones(3)
y = nd.zeros(4)
filename='./datasets/test1.params'
nd.save(filename,[x,y])

In [75]:
a,b = nd.load(filename)
print(a,b)


[1. 1. 1.]
<NDArray 3 @cpu(0)> 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>


In [80]:
mydict={'x':x,'y':y}
filename='./datasets/test2.params'
nd.save(fileanme,mydict)

In [81]:
c = nd.load(filename)
print(c)

{'x': 
[1. 1. 1.]
<NDArray 3 @cpu(0)>, 'y': 
[0. 0. 0. 0.]
<NDArray 4 @cpu(0)>}


### 读写Gluon模型的参数

In [84]:
from mxnet.gluon import nn

def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(10,activation='relu'))
        net.add(nn.Dense(2))
    return net
net = get_net()
net.initialize()
x = nd.random_uniform(shape=(2,10))
print(net(x))


[[0.00421636 0.00520459]
 [0.00218338 0.00886396]]
<NDArray 2x2 @cpu(0)>


In [85]:
filename = './datasets/mpl.params'
net.save_params(filename)

In [86]:
import mxnet as mx
net2 = get_net()
net2.load_params(filename,mx.cpu())
print(net2(x))


[[0.00421636 0.00520459]
 [0.00218338 0.00886396]]
<NDArray 2x2 @cpu(0)>


通过 `load_params`和`save_params` 可以很方便读写模型参数

### 自定义层  

In [87]:
from mxnet import nd
from mxnet.gluon import nn

class CenteredLayer(nn.Block):
    def __init__(self,**kwargs):
        super(CenteredLayer,self).__init__(**kwargs)
        
    def forward(self,x):
        return x-x.mean()

In [88]:
layer = CenteredLayer()
layer(nd.array([1,2,3,4,5]))


[-2. -1.  0.  1.  2.]
<NDArray 5 @cpu(0)>

In [89]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(128))
    net.add(nn.Dense(10))
    net.add(CenteredLayer())

In [90]:
net.initialize()
y = net(nd.random.uniform(shape=(4,8)))
print(y)
y.mean()


[[-1.87754538e-03  5.78120686e-02  9.98522341e-03  3.06460634e-02
   1.01760589e-02  5.75880520e-02 -7.03754872e-02 -1.30356504e-02
  -5.69210481e-03  2.71022543e-02]
 [ 1.25990026e-02  4.80866991e-04 -4.84369975e-03 -1.26346555e-02
   1.97638925e-02 -2.96838349e-03 -4.01096642e-02 -2.26726755e-02
  -2.98638102e-02 -3.92932445e-04]
 [ 2.24348567e-02  1.02849845e-02 -9.28923395e-03 -2.40185149e-02
  -5.03581390e-03  2.18460895e-03 -3.43593210e-02 -1.04938429e-02
  -2.28233449e-02 -1.25840306e-05]
 [ 6.37116190e-03  4.96428497e-02  1.22206099e-03  3.11558954e-02
   1.21748634e-03  5.18759489e-02 -7.60145485e-02 -1.70021430e-02
  -1.68196894e-02  1.77923273e-02]]
<NDArray 4x10 @cpu(0)>



[4.1909515e-10]
<NDArray 1 @cpu(0)>

### 带模型参数的自定义层

In [92]:
from mxnet import gluon
my_param = gluon.Parameter('exciting_parameter_yay',shape=(3,3))

In [93]:
my_param.initialize()
(my_param.data(),my_param.grad())

(
 [[-0.00037004  0.06726799 -0.05776642]
  [-0.05649688 -0.01589736  0.05088589]
  [ 0.03212274  0.00931085  0.01094945]]
 <NDArray 3x3 @cpu(0)>, 
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 <NDArray 3x3 @cpu(0)>)

In [94]:
pd = gluon.ParameterDict(prefix='block1_')
pd.get('excition_parameter_yay',shape=(3,3))
pd

block1_ (
  Parameter block1_excition_parameter_yay (shape=(3, 3), dtype=<class 'numpy.float32'>)
)

In [102]:
class MyDense(nn.Block):
    def __inti__(self,units,in_units,**kwargs):
        super(MyDense,self).__init__(**kwargs)
        with self.name_scope():
            self.weight = self.params.get('weight',shape=(in_units,units))
        self.bias = self.params.get('biaa',shape=(units))
        
    def forward(self,x):
        linear = nd.dot(x,self.weight.data())+self.bias.data()
        return nd.relu(linear)

### 前面信息量略大，没懂咳咳，都是造轮子

# Dropout

In [104]:
from mxnet import nd

In [105]:
def dropout(X,drop_probability):
    keep_probability = 1-drop_probability
    assert 0<= keep_probability<=1
    #这种情况下全部丢弃
    if keep_probability == 0:
        return X.zeros_like()
    
    #随机选择一部分该层的输出作为丢弃元素
    mask = nd.random.uniform(0,1.0,X.shape,ctx=X.context)< keep_probability
    
    #保证 E[dropout(X)] == X
    scale = 1/keep_probability
    return mask * X * scale

In [139]:
A = nd.arange(20).reshape((5,4))
dropout(A,0.9)


[[ 0.  0.  0.  0.]
 [40.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
<NDArray 5x4 @cpu(0)>

### 丢弃法的本质

通常dropout发生在全连接层的后面，一般前面丢弃的少，后面丢弃的多  
另外 可以认为丢弃发自带集成的属性，通过丢弃部分产生新的架构

### AlexNet

In [3]:
from mxnet.gluon import nn
from mxnet import autograd
from mxnet import gluon
from mxnet import nd
from mxnet import init

In [24]:
net = nn.Sequential()
with net.name_scope():
    #第一阶段
    net.add(nn.Conv2D(channels=96,kernel_size=11,strides=4,activation='relu'))
    net.add(nn.MaxPool2D(pool_size=3,strides=2))
    
    #第二阶段
    net.add(nn.Conv2D(channels=96,kernel_size=5,padding=2,activation='relu'))
    net.add(nn.MaxPool2D(pool_size=3,strides=2))
    
    #第三阶段
    net.add(nn.Conv2D(channels=384,kernel_size=3,padding=1,activation='relu'))
    net.add(nn.Conv2D(channels=384,kernel_size=3,padding=1,activation='relu'))
    net.add(nn.Conv2D(channels=256,kernel_size=3,padding=1,activation='relu'))
    net.add(nn.MaxPool2D(pool_size=3,strides=2))
    
    #第四阶段
    net.add(nn.Flatten())
    net.add(nn.Dense(4096,activation='relu'))
    net.add(nn.Dropout(0.5))
    
    #第五阶段
    net.add(nn.Dense(4096,activation='relu'))
    net.add(nn.Dropout(0.5))
    
    #第六阶段
    net.add(nn.Dense)
net.initialize()

TypeError: collect_params() missing 1 required positional argument: 'self'

In [15]:
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    net.add(
        # 第一阶段
        nn.Conv2D(channels=96, kernel_size=11,
                  strides=4, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 第二阶段
        nn.Conv2D(channels=256, kernel_size=5,
                  padding=2, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 第三阶段
        nn.Conv2D(channels=384, kernel_size=3,
                  padding=1, activation='relu'),
        nn.Conv2D(channels=384, kernel_size=3,
                  padding=1, activation='relu'),
        nn.Conv2D(channels=256, kernel_size=3,
                  padding=1, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2),
        # 第四阶段
        nn.Flatten(),
        nn.Dense(4096, activation="relu"),
        nn.Dropout(.5),
        # 第五阶段
        nn.Dense(4096, activation="relu"),
        nn.Dropout(.5),
        # 第六阶段
        nn.Dense(10)
    )
net.initialize()

In [16]:
def transform(data,label):
    data = image.imresize(data,224,224)
    return data.astype('float32')/255,label.astype('float32')

In [17]:
mnist_train = gluon.data.vision.FashionMNIST(root='../Mx/datasets/fashion-mnist/',train=True,transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(root='../Mx/datasets/fashion-mnist/',train=False,transform=transform)

  label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
  data = np.fromstring(fin.read(), dtype=np.uint8)


### 数据读取

In [18]:
batch_size = 64
train_data = gluon.data.DataLoader(mnist_train,batch_size,shuffle=True)
test_data = gluon.data.DataLoader(mnist_test,batch_size,shuffle=False)

In [19]:
def accuracy(output,label):
    return nd.mean(output.argmax(axis=1)==label).asscalar()

In [20]:
def evaluate_accuracy(data_iterator,net):
    acc = 0
    for data,label in data_iterator:
        output = net(data)
        acc += accuracy(output,label)
    return acc/len(data_iterator)

### 训练

In [22]:
from mxnet import autograd
from mxnet import gluon
from mxnet import nd
from mxnet import init

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.01})

for epoch in range(10):
    train_loss = 0.
    train_acc = 0.
    for data,label in train_data:
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output,label)
        loss.backward()
        trainer.step(batch_size)
        
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output,label)
    test_acc = evaluate_accuracy(test_data,net)
    print("Epoch %d. Loss: %f, Train acc %f,Test acc %f"%\
         (epoch,train_loss/len(train_data),train_acc/len(train_data),
         test_acc))  

NameError: name 'image' is not defined

### 先过，未找到原因    =  =！！

# VGG

In [None]:
from mxnet.gluon import nn

def vgg_block(num_convs,channels):
    