Multi Layer Perceptron

In [1]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [2]:
X = torch.FloatTensor([[0, 0],  [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [3]:
w1 = torch.Tensor(2, 2).to(device)
b1 = torch.Tensor(2).to(device)
w2 = torch.Tensor(2, 1).to(device)
b2 = torch.Tensor(1).to(device)

In [4]:
def sigmoid(x):
    return 1.0/(1.0 + torch.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [5]:
learning_rate = 1

for step in range(10001):
    #forward
    l1 = X.matmul(w1) + b1
    a1 = sigmoid(l1)
    l2 = a1.matmul(w2) + b2
    Y_pred = sigmoid(l2)
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))
    #back prop
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)

    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = a1.transpose(0, 1).matmul(d_b2)

    d_a1 = d_b2.matmul(w2.transpose(0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 =X.transpose(0, 1).matmul(d_b1)

    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * d_b1.mean(0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * d_b2.mean(0)

    if step % 100 == 0:
        print("step:{:5d}, cost:{}".format(step, cost.item()))



step:    0, cost:0.6931471824645996
step:  100, cost:0.6931471824645996
step:  200, cost:0.6931471824645996
step:  300, cost:0.6931471824645996
step:  400, cost:0.6931471824645996
step:  500, cost:0.6931471824645996
step:  600, cost:0.6931471824645996
step:  700, cost:0.6931471824645996
step:  800, cost:0.6931471824645996
step:  900, cost:0.6931471824645996
step: 1000, cost:0.6931471824645996
step: 1100, cost:0.6931471824645996
step: 1200, cost:0.6931471824645996
step: 1300, cost:0.6931471824645996
step: 1400, cost:0.6931471824645996
step: 1500, cost:0.6931471824645996
step: 1600, cost:0.6931471824645996
step: 1700, cost:0.6931471824645996
step: 1800, cost:0.6931471824645996
step: 1900, cost:0.6931471824645996
step: 2000, cost:0.6931471824645996
step: 2100, cost:0.6931471824645996
step: 2200, cost:0.6931471824645996
step: 2300, cost:0.6931471824645996
step: 2400, cost:0.6931471824645996
step: 2500, cost:0.6931471824645996
step: 2600, cost:0.6931471824645996
step: 2700, cost:0.693147182

In [6]:
print(Y_pred)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], device='cuda:0')


xor-nn

In [7]:
X = torch.FloatTensor([[0, 0],  [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [8]:
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print("step:{:5d}, cost:{:5f}".format(step, cost.item()))
    

step:    0, cost:0.743407
step:  100, cost:0.693165
step:  200, cost:0.693158
step:  300, cost:0.693152
step:  400, cost:0.693146
step:  500, cost:0.693141
step:  600, cost:0.693136
step:  700, cost:0.693130
step:  800, cost:0.693122
step:  900, cost:0.693113
step: 1000, cost:0.693100
step: 1100, cost:0.693082
step: 1200, cost:0.693057
step: 1300, cost:0.693019
step: 1400, cost:0.692961
step: 1500, cost:0.692866
step: 1600, cost:0.692703
step: 1700, cost:0.692396
step: 1800, cost:0.691730
step: 1900, cost:0.689965
step: 2000, cost:0.683832
step: 2100, cost:0.656167
step: 2200, cost:0.431102
step: 2300, cost:0.134893
step: 2400, cost:0.066304
step: 2500, cost:0.042168
step: 2600, cost:0.030454
step: 2700, cost:0.023666
step: 2800, cost:0.019278
step: 2900, cost:0.016224
step: 3000, cost:0.013984
step: 3100, cost:0.012274
step: 3200, cost:0.010928
step: 3300, cost:0.009842
step: 3400, cost:0.008949
step: 3500, cost:0.008201
step: 3600, cost:0.007567
step: 3700, cost:0.007022
step: 3800, 

In [9]:
print(model(X))

tensor([[0.0011],
        [0.9989],
        [0.9989],
        [0.0017]], device='cuda:0', grad_fn=<SigmoidBackward0>)


xor-nn-wide-deep

In [10]:
X = torch.FloatTensor([[0, 0],  [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [11]:
linear1 = torch.nn.Linear(2, 10, bias=True)
linear2 = torch.nn.Linear(10, 10, bias=True)
linear3 = torch.nn.Linear(10, 10, bias=True)
linear4 = torch.nn.Linear(10, 1, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print("step:{:5d}, cost:{:5f}".format(step, cost.item()))
    

step:    0, cost:0.697808
step:  100, cost:0.693111
step:  200, cost:0.693102
step:  300, cost:0.693092
step:  400, cost:0.693081
step:  500, cost:0.693068
step:  600, cost:0.693052
step:  700, cost:0.693033
step:  800, cost:0.693010
step:  900, cost:0.692981
step: 1000, cost:0.692944
step: 1100, cost:0.692896
step: 1200, cost:0.692831
step: 1300, cost:0.692742
step: 1400, cost:0.692612
step: 1500, cost:0.692416
step: 1600, cost:0.692098
step: 1700, cost:0.691539
step: 1800, cost:0.690434
step: 1900, cost:0.687838
step: 2000, cost:0.679710
step: 2100, cost:0.640832
step: 2200, cost:0.548876
step: 2300, cost:0.508318
step: 2400, cost:0.487521
step: 2500, cost:0.590370
step: 2600, cost:0.020090
step: 2700, cost:0.007716
step: 2800, cost:0.004512
step: 2900, cost:0.003111
step: 3000, cost:0.002342
step: 3100, cost:0.001863
step: 3200, cost:0.001538
step: 3300, cost:0.001304
step: 3400, cost:0.001129
step: 3500, cost:0.000993
step: 3600, cost:0.000885
step: 3700, cost:0.000796
step: 3800, 

In [12]:
print(model(X))

tensor([[7.4741e-05],
        [9.9988e-01],
        [9.9990e-01],
        [8.8276e-05]], device='cuda:0', grad_fn=<SigmoidBackward0>)
