In [1]:
#DeepLearningCal
from mxnet import nd
from mxnet.gluon import nn

class MLP(nn.Block):
    #声明带有模型参数的层
    def __init__(self,**kwargs):
        #调用MLP父类Block的构造函数来进行必要的初始化。这样在构造实例时还可以指定其他函数
        #参数,如模型参数的访问，初始化和共享
        super(MLP,self).__init__(**kwargs)
        self.hidden = nn.Dense(256,activation='relu') #隐藏层
        self.output = nn.Dense(10)  #输出层
        
    #定义模型的前向运算，即如何根据输入x计算返回所需要的模型输出
    def forward(self,x):
        return self.output(self.hidden(x))
    
    

In [3]:
#通过MLP实例化网络
X  = nd.random.uniform(shape=(2,20))
net = MLP()
net.initialize()
net(X)


[[ 0.09543004  0.04614332 -0.00286655 -0.07790346 -0.05130241  0.02942038
   0.08696645 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099706  0.00856576 -0.044672   -0.06926838  0.09132431
   0.06786592 -0.06187843 -0.03436674  0.04234696]]
<NDArray 2x10 @cpu(0)>

In [12]:
class MySequential(nn.Block):
    def __init__(self,**kwargs):
        super(MySequential,self).__init__(**kwargs)
    
    def add(self,block):
        #block是一个Block子类实例，假设它有一个独一无二的名字。我们将它保存在Block类的
        # 成员变量_children⾥，其类型是OrderedDict。当MySequential实例调⽤
        # initialize函数时，系统会⾃动对_children⾥所有成员初始化
        self._children[block.name] = block
    def forward(self,x):
        #OrderedDict保证会按照成员添加时的顺序遍历成员
        for block in self._children.values():
            x = block(x)
        return x

In [13]:
net = MySequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(X)


[[ 0.00362229  0.00633331  0.03201145 -0.01369375  0.10336448 -0.0350802
  -0.00032165 -0.01676024  0.06978628  0.01303309]
 [ 0.03871717  0.02608212  0.03544958 -0.02521311  0.11005436 -0.01430663
  -0.03052467 -0.03852826  0.06321152  0.0038594 ]]
<NDArray 2x10 @cpu(0)>

In [14]:
class FancyMLP(nn.Block):
    def __init__(self,**kwargs):
        super(FancyMLP,self).__init__(**kwargs)
        #使用get_constant创建的随机权重参数不在训练中被迭代（常量）
        self.rand_weight = self.params.get_constant('rand_weight',nd.random.uniform(shape=(20,20)))
        self.dense = nn.Dense(20,activation='relu')
        
    def forward(self,x):
        x = self.dense(x)
        #使用创建的常数参数，以及NDArrayd的relu函数和dot函数
        x = nd.relu(nd.dot(x,self.rand_weight.data())+1)
        #复用全连接层，等价于两个全连接共享参数
        x = self.dense(x)
        #控制流，这里调用asscalar函数来返回标量进行比较
        while x.norm().asscalar() > 1:
            x /=2
        if x.norm().asscalar() < 0.8:
            x*=10
        return x.sum()

In [16]:
net = FancyMLP()
net.initialize()
net(X)


[21.028353]
<NDArray 1 @cpu(0)>

In [18]:
class NestMLP(nn.Block):
    def __init__(self,**kwargs):
        super(NestMLP,self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'),
                            nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16,activation='relu')
    def forward(self,x):
        return self.dense(self.net(x))
net = nn.Sequential()
net.add(NestMLP(),nn.Dense(20),FancyMLP())

net.initialize()
net(X)
    


[20.43586]
<NDArray 1 @cpu(0)>

In [1]:
#从MXNet导入init模块 七包含多种模型初始化方法
from mxnet import init,nd
from mxnet.gluon import nn

net = nn.Sequential()
net.add(nn.Dense(256,activation = 'relu'))
net.add(nn.Dense(10))

net.initialize()  #默认初始化

X = nd.random.uniform(shape=(2,20))
Y = net(X)  #前向计算

In [2]:
# ⾮⾸次对模型初始化需要指定force_reinit为真
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
#net[0].weight.data()[0]

#下⾯使⽤常数来初始化权重参数
net.initialize(init=init.Constant(1), force_reinit=True)
#net[0].weight.data()[0]

#使⽤Xavier随机初始化⽅法。
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
#net[0].weight.data()[0]


In [4]:
#自定义初始化方法
class MyInit(init.Initializer):
    def _init_weight(self,name,data):
        print('Init',name,data.shape)
        data[:] = nd.random_uniform(low = -10,high=10,shape=data.shape)
        data *= data.abs() >= 5
net.initialize(MyInit(),force_reinit=True)
print(net[0].weight.data()[0])    

Init dense0_weight (256, 20)
Init dense1_weight (10, 256)

[-5.3659673  7.5773945  8.986376  -0.         8.827555   0.
  5.9840508 -0.         0.         0.         7.4857597 -0.
 -0.         6.8910007  6.9788704 -6.1131554  0.         5.4665203
 -9.735263   9.485172 ]
<NDArray 20 @cpu(0)>


In [5]:
net[0].weight.set_data(net[0].weight.data()+1)
net[0].weight.data()[0]


[-4.3659673  8.5773945  9.986376   1.         9.827555   1.
  6.9840508  1.         1.         1.         8.48576    1.
  1.         7.8910007  7.9788704 -5.1131554  1.         6.4665203
 -8.735263  10.485172 ]
<NDArray 20 @cpu(0)>

In [5]:
#共享模型参数
net = nn.Sequential()
shared = nn.Dense(8,activation='relu')
#使用关键字shared进行参数共享
net.add(nn.Dense(8,activation='relu'),shared,nn.Dense(8,activation='relu',params = shared.params),nn.Dense(10))
net.initialize()
X = nd.random.uniform(shape=(2,20))
net(X)

net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

In [1]:
#延后初始化
from mxnet import init,nd
from mxnet.gluon import nn

class MyInit(init.Initializer):
    def _init_weight(self,name,data):
        print('init',name,data.shape)

net = nn.Sequential()
net.add(nn.Dense(256,activation='relu'),nn.Dense(10))

net.initialize(init = MyInit())

In [2]:
#在调用模型进行计算时才进行初始化
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

init dense0_weight (256, 20)
init dense1_weight (10, 256)


In [4]:
#避免延后 通过对已初始化模型再次初始化时直接进行
#在创建层时指定输入个数 系统不需要额外信息推测参数形状时也直接进行初始化

net.initialize(init=MyInit(),force_reinit=True)

net = nn.Sequential()
net.add(nn.Dense(256,in_units = 20,activation='relu'))
net.add(nn.Dense(10,in_units = 256))
net.initialize(init=MyInit())

init dense0_weight (256, 20)
init dense1_weight (10, 256)
init dense2_weight (256, 20)
init dense3_weight (10, 256)


In [1]:
#自定义层
from mxnet import gluon,nd
from mxnet.gluon import nn

class CenteredLayer(nn.Block):
    def __init__(self,**kwargs):
        super(CenteredLayer,self).__init__(**kwargs)
    def forward(self,x):
        return x - x.mean()

layer = CenteredLayer() #实例化并做前向计算
layer(nd.array([1,2,3,4,5]))

net = nn.Sequential()
net.add(nn.Dense(128),CenteredLayer())

net.initialize()
y = net(nd.random.uniform(shape=(4,8)))
y.mean().asscalar()

-9.367795e-10

In [2]:
#自定义含模型参数的层
params = gluon.ParameterDict()
params.get('param2',shape=(2,3))
params

(
  Parameter param2 (shape=(2, 3), dtype=<class 'numpy.float32'>)
)

In [6]:
class MyDense(nn.Block):
    #units维该层的输出个数，in_units为该层输入个数
    def __init__(self,units,in_units,**kwargs):
        super(MyDense,self).__init__(**kwargs)
        self.weight = self.params.get('weight',shape=(in_units,units))
        self.bias = self.params.get('bias',shape=(units,))
    
    def forward(self,x):
        linear = nd.dot(x,self.weight.data())+self.bias.data()
        return nd.relu(linear)

In [7]:
dense = MyDense(units = 3,in_units=5)
dense.params

mydense1_ (
  Parameter mydense1_weight (shape=(5, 3), dtype=<class 'numpy.float32'>)
  Parameter mydense1_bias (shape=(3,), dtype=<class 'numpy.float32'>)
)

In [8]:
dense.initialize()
dense(nd.random.uniform(shape=(2,5)))


[[0.06917784 0.01627153 0.01029644]
 [0.02602214 0.04537309 0.        ]]
<NDArray 2x3 @cpu(0)>

In [9]:
net = nn.Sequential()
net.add(MyDense(8,in_units = 64),MyDense(1,in_units=8))
net.initialize()
net(nd.random.uniform(shape=(2,64)))


[[0.03820475]
 [0.04035058]]
<NDArray 2x1 @cpu(0)>

In [1]:
#读写和存储模型
from mxnet import nd
from mxnet.gluon import nn
x = nd.ones(3)
nd.save('x',x)


In [2]:
x2 = nd.load('x')
x2

[
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>]

In [3]:
class MLP(nn.Block):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        self.hidden = nn.Dense(256,activation='relu')
        self.output = nn.Dense(10)
    def forward(self,x):
        return self.output(self.hidden(x))
net = MLP()
net.initialize()
X = nd.random.uniform(shape=(2,20))
Y = net(X)

In [4]:
filename = 'mlp.params'
net.save_parameters(filename)

In [5]:
net2 = MLP()
net2.load_parameters(filename)

In [6]:
Y2 = net2(X)
Y2 == Y


[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
<NDArray 2x10 @cpu(0)>

In [7]:
#开启GPU
!nvidia-smi

'nvidia-smi' 不是内部或外部命令，也不是可运行的程序
或批处理文件。


In [8]:
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn
mx.cpu(), mx.gpu(), mx.gpu(1)

(cpu(0), gpu(0), gpu(1))

In [10]:
a = nd.array([1, 2, 3], ctx=mx.gpu())    #开启GPU加速
a


[1. 2. 3.]
<NDArray 3 @gpu(0)>