In [1]:
import torch

In [65]:
class ThreeLayerNeuralNetworkOnWheel(object):
    """
    3-NN 
    input=2
    tanh_layer input=2, output=4 (y1=tanh(x@w1+b1))
    sigmoid_layer  input=4, output=4 (y2=relu(y1@w2+b2))
    tanh_layer input=4, output=1 (y=sigmoid(y2@w3+b3))
    """
    def __init__(self,
                 lr=5e-3, 
                 epoch=3000,):
        self.network = torch.nn.Sequential(
            torch.nn.Linear(2,4), # using layers defined in torch.nn skips declaration of nn.Parameter objects
            torch.nn.Tanh(),
            torch.nn.Linear(4,4), # nn.Linear is the linear layer x@w+b
            torch.nn.Sigmoid(), # nn.Tanh and nn.Sigmoid is not 'function' but 'layer'
            torch.nn.Linear(4,1),
            torch.nn.Tanh(),
        ) # a sequential object chains all the layers so that no need to write the computation of each layer
        
        self.epoch = epoch
        
        self.optimizer = torch.optim.SGD(self.network.parameters(), lr = lr)
        self.loss_func = torch.nn.BCEWithLogitsLoss()
    
    def load_params_from_file(self, path):
        params = torch.load(path)  # use torch.load to load params from file
        self.network.load_state_dict(params)
    
    def load_model_and_params_from_file(self, path):
        self.network = torch.load(path)  # use torch.load to load the whole model and params form file
    
    def train(self, x_train, y_train):
        for e in range(self.epoch):
            y_estimate = self.network(x_train)
            loss = self.loss_func(y_estimate, y_train) # attention: do not swap the position!
            loss.backward()
            self.optimizer.step()
            if e%100==0 or e==self.epoch-1:
                print("epoch{}/{}, loss={}".format(e, self.epoch, loss))
            self.optimizer.zero_grad()

In [63]:
threeNN = ThreeLayerNeuralNetworkOnWheel()
threeNN.load_params_from_file("3NN_params.pth")

In [None]:
threeNN.load_model_and_params_from_file("3NN.pth")

In [49]:
def print_network_params(net):
    for i,layer in enumerate(net):
        try:
            layer_weight = layer.weight
        except:
            layer_weight = None
        print("layer{}:{},weights={}".format(
                i, layer, layer_weight
            ))
print_network_params(threeNN.network)

layer0:Linear(in_features=2, out_features=4, bias=True),weights=Parameter containing:
tensor([[-0.0895,  0.5616],
        [-0.2315, -0.3945],
        [ 0.5897,  0.3278],
        [ 0.5580, -0.2806]], requires_grad=True)
layer1:Tanh(),weights=None
layer2:Linear(in_features=4, out_features=4, bias=True),weights=Parameter containing:
tensor([[ 0.3124, -0.1022,  0.4034, -0.0024],
        [-0.4612, -0.1980,  0.4264, -0.4843],
        [ 0.2461, -0.4284,  0.3987,  0.1740],
        [-0.3581,  0.1977, -0.2569,  0.2238]], requires_grad=True)
layer3:Sigmoid(),weights=None
layer4:Linear(in_features=4, out_features=1, bias=True),weights=Parameter containing:
tensor([[ 0.2463, -0.1433, -0.4782, -0.0488]], requires_grad=True)
layer5:Tanh(),weights=None


In [57]:
import numpy as np
import matplotlib.pyplot as plt
def make_dataset(sample_count, input_dim=2, label_classes=2):
    x = np.zeros((sample_count, input_dim))
    y = np.zeros((sample_count, 1))
    N = int(sample_count/label_classes)
    for c in range(label_classes):
        ix = range(N*c,N*(c+1))
        t = np.linspace(c*3.12,(c+1)*3.12,N) + np.random.randn(N)*0.2
        r = 4*np.sin(4*t) + np.random.randn(N)*0.2
        x[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        y[ix] = c
    return x,y
train_x, train_y = make_dataset(10000)

In [64]:
threeNN.train(
    torch.Tensor(train_x),
    torch.Tensor(train_y),
)

epoch0/3000, loss=0.6109656095504761
epoch100/3000, loss=0.6104978919029236
epoch200/3000, loss=0.6100173592567444
epoch300/3000, loss=0.6095231771469116
epoch400/3000, loss=0.6090143322944641
epoch500/3000, loss=0.6084896326065063
epoch600/3000, loss=0.607948362827301
epoch700/3000, loss=0.607388973236084
epoch800/3000, loss=0.6068106293678284
epoch900/3000, loss=0.6062121391296387
epoch1000/3000, loss=0.6055920124053955
epoch1100/3000, loss=0.6049495339393616
epoch1200/3000, loss=0.6042832136154175
epoch1300/3000, loss=0.6035919189453125
epoch1400/3000, loss=0.6028746962547302
epoch1500/3000, loss=0.6021305322647095
epoch1600/3000, loss=0.6013582944869995
epoch1700/3000, loss=0.600557267665863
epoch1800/3000, loss=0.599726676940918
epoch1900/3000, loss=0.598865807056427
epoch2000/3000, loss=0.5979741215705872
epoch2100/3000, loss=0.5970514416694641
epoch2200/3000, loss=0.5960969924926758
epoch2300/3000, loss=0.5951108336448669
epoch2400/3000, loss=0.594092607498169
epoch2500/3000, lo

In [62]:
torch.save(threeNN.network, "3NN.pth")  # save the model and trained parameters
torch.save(threeNN.network.state_dict(), "3NN_params.pth")  # only save the trained parameters
