# test megengine grad


In [1]:
from megengine import Tensor

x = Tensor([3.])
w = Tensor([2.])
b = Tensor([-1.])
y = w * x + b
from megengine.autodiff import GradManager

with GradManager() as gm:
    gm.attach(x)
    y = w * x + b
    gm.backward(y)  # dy/dx = w
x.grad

Tensor([2.], device=xpux:0)

# Main Code for XOR

## Dataset definition

In [2]:
from megengine.data import DataLoader, RandomSampler
import megengine
import numpy as np
from megengine.data.dataset import Dataset


class XOR_Dataset(Dataset):
    def __init__(self, dataset_size=6000):
        super(XOR_Dataset, self).__init__()
        self.dataset_size = dataset_size
        self.input_data = np.round(np.random.rand(self.dataset_size, 2)).astype(np.uint)  # (N,2)
        self.label = self.input_data[..., 0] ^ self.input_data[..., 1]

    def __len__(self):
        return self.dataset_size

    def __getitem__(self, index):
        return self.input_data[index].astype(np.float32), self.label[index].astype(np.float32)


train_dataset = XOR_Dataset()
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(dataset=train_dataset, batch_size=50))
test_dataset = XOR_Dataset()
test_sampler = megengine.data.SequentialSampler(test_dataset, batch_size=50)
test_dataloader = DataLoader(test_dataset,sampler=test_sampler)
print(len(train_dataloader.dataset))
for x, y in train_dataloader:
    print(x)
    print(type(x))
    print(y)
    print('-------')
    break

6000
[[1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 1.]
 [0. 1.]
 [1. 1.]
 [1. 0.]
 [1. 0.]
 [1. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [1. 0.]
 [0. 1.]
 [1. 1.]
 [1. 1.]
 [0. 1.]
 [0. 0.]
 [1. 1.]
 [1. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 0.]
 [1. 1.]
 [1. 0.]
 [1. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 0.]
 [0. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 1.]
 [1. 1.]]
<class 'numpy.ndarray'>
[0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0.
 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1.
 0. 0.]
-------


## network definition

In [3]:
import math
import megengine.functional as F
import megengine.module as M


class XOR_Net(M.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = M.Linear(2, 10)
        self.fc2 = M.Linear(10, 1)
        # self.init_self()

    def forward(self, input):
        x = F.relu(self.fc1(input))
        x = F.sigmoid(self.fc2(x))
        return x

    def init_self(self):
        for m in self.modules():
            if isinstance(m, M.Conv2d):
                M.init.msra_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    fan_in, _ = M.init.calculate_fan_in_and_fan_out(m.weight)
                    bound = 1 / math.sqrt(fan_in)
                    M.init.uniform_(m.bias, -bound, bound)
            elif isinstance(m, M.BatchNorm2d):
                M.init.ones_(m.weight)
                M.init.zeros_(m.bias)
            elif isinstance(m, M.Linear):
                M.init.msra_uniform_(m.weight, a=math.sqrt(5))
                if m.bias is not None:
                    fan_in, _ = M.init.calculate_fan_in_and_fan_out(m.weight)
                    bound = 1 / math.sqrt(fan_in)
                    M.init.uniform_(m.bias, -bound, bound)


my_net = XOR_Net()

print(my_net(megengine.Tensor([1, 1])))

Tensor([0.665], device=xpux:0)


## train loop

In [4]:
from megengine.autodiff import GradManager
import megengine.optimizer as optim

gm = GradManager().attach(my_net.parameters())
optimizer = optim.Adam(my_net.parameters(), lr=0.01)  # lr may vary with different model

nums_epoch = 20
for epoch in range(nums_epoch):
    training_loss = 0
    nums_train_correct, nums_train_example = 0, 0
    nums_val_correct, nums_val_example = 0, 0

    for step, (data, label) in enumerate(train_dataloader):
    # for step, (data, label) in enumerate(train_dataset):
        data = megengine.Tensor(data)
        label = megengine.Tensor(label)

        with gm:
            score = my_net(data).flatten()
            loss = F.nn.binary_cross_entropy(score, label,with_logits = False)
            gm.backward(loss)
            optimizer.step().clear_grad()

        training_loss += loss.item() * len(data)

        # pred = F.argmax(score, axis=1)
        pred = F.round(score).flatten()
        nums_train_correct += (pred == label).sum().item()
        nums_train_example += len(data)

    training_acc = nums_train_correct / nums_train_example
    training_loss /= nums_train_example

    for data, label in test_dataloader:
        data = megengine.Tensor(data)
        label = megengine.Tensor(label)
        pred = F.round(my_net(data)).flatten()

        nums_val_correct += (pred == label).sum().item()
        nums_val_example += len(data)

    val_acc = nums_val_correct / nums_val_example

    print(f"Epoch = {epoch}, "
          f"train_loss = {training_loss:.3f}, "
          f"train_acc = {training_acc:.3f}, "
          f"val_acc = {val_acc:.3f}")


Epoch = 0, train_loss = 0.472, train_acc = 0.894, val_acc = 1.000
Epoch = 1, train_loss = 0.135, train_acc = 1.000, val_acc = 1.000
Epoch = 2, train_loss = 0.051, train_acc = 1.000, val_acc = 1.000
Epoch = 3, train_loss = 0.027, train_acc = 1.000, val_acc = 1.000
Epoch = 4, train_loss = 0.017, train_acc = 1.000, val_acc = 1.000
Epoch = 5, train_loss = 0.012, train_acc = 1.000, val_acc = 1.000
Epoch = 6, train_loss = 0.009, train_acc = 1.000, val_acc = 1.000
Epoch = 7, train_loss = 0.007, train_acc = 1.000, val_acc = 1.000
Epoch = 8, train_loss = 0.006, train_acc = 1.000, val_acc = 1.000
Epoch = 9, train_loss = 0.005, train_acc = 1.000, val_acc = 1.000
Epoch = 10, train_loss = 0.004, train_acc = 1.000, val_acc = 1.000
Epoch = 11, train_loss = 0.003, train_acc = 1.000, val_acc = 1.000
Epoch = 12, train_loss = 0.003, train_acc = 1.000, val_acc = 1.000
Epoch = 13, train_loss = 0.002, train_acc = 1.000, val_acc = 1.000
Epoch = 14, train_loss = 0.002, train_acc = 1.000, val_acc = 1.000
Epoch

In [5]:
## test load model

In [6]:
# my_net.load_state_dict(megengine.load('xornet_state_dict.pkl'))

# nums_val_correct, nums_val_example = 0, 0
# for data, label in test_dataloader:
#     data = megengine.Tensor(data)
#     label = megengine.Tensor(label)
#     pred = F.round(my_net(data)).flatten()
#
#     nums_val_correct += (pred == label).sum().item()
#     nums_val_example += len(data)
#
# val_acc = nums_val_correct / nums_val_example
#
# print(f"val_acc = {val_acc:.3f}")

ValueError: param `fc1.bias` shape mismatch, should be (10,), get (2,)