In [1]:


import torch
import matplotlib.pyplot as plt
%matplotlib qt
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, art3d


x_train = torch.tensor([[0.0, 1.0], [1.0, 0.0],[1.0, 1.0],[0.0, 0.0]]).reshape(-1, 2)
y_train = torch.tensor([[1.0], [1.0], [0.0], [1.0]]).reshape(-1, 1)
x_train1 = x_train[:,0].reshape(-1, 1);
x_train2 = x_train[:,1].reshape(-1, 1);

class NANDModel:
    def __init__(self):
        # Model variables
        self.W = torch.tensor([[0.0], [0.0]], requires_grad=True)
        self.b = torch.tensor([[0.0]], requires_grad=True)

    # Predictor
    def f(self, x1, x2):
        return torch.sigmoid((x1 @ (self.W[0]) + x2 @ (self.W[1])) + self.b)  # @ corresponds to matrix multiplication

    def logits(self, x1, x2):
         return ((x1 @ self.W[0]) + (x2 @ self.W[1]) + self.b).reshape(-1, 1)

    def loss(self, x1, x2, y):
        return torch.nn.functional.binary_cross_entropy_with_logits(self.logits(x1, x2), y)  # Can also use torch.nn.functional.mse_loss(self.f(x), y) to possibly increase numberical stability


model = NANDModel()

# Optimize: adjust W and b to minimize loss using stochastic gradient descent
optimizer = torch.optim.SGD([model.W, model.b], 0.1)
for epoch in range(10000):
    model.loss(x_train1, x_train2, y_train).backward()  # Compute loss gradients

    optimizer.step()  # Perform optimization by adjusting W and b

    optimizer.zero_grad()  # Clear gradients for next step

# Print model variables and loss
print("W = %s, b = %s, loss = %s" % (model.W, model.b, model.loss(x_train1, x_train2, y_train)))


fig = plt.figure("Logistic regression: the logical NAND operator")

plot1 = fig.add_subplot(111, projection='3d')

x1_grid, x2_grid = np.meshgrid(np.linspace(-0.25, 1.25, 10), np.linspace(-0.25, 1.25, 10))
y_grid = np.empty([10, 10])
for i in range(0, x1_grid.shape[0]):
    for j in range(0, x1_grid.shape[1]):
        y_grid[i, j] = model.f(torch.tensor(float(x1_grid[i, j])).reshape(-1, 1), torch.tensor(float(x2_grid[i, j])).reshape(-1, 1))

plot1_f = plot1.plot_wireframe(x1_grid, x2_grid, y_grid, color="green")

plot1.plot(x_train1.squeeze(), x_train2.squeeze(), y_train.squeeze(), 'o',  color="blue")

plot1_info = fig.text(0.01, 0.02, "")

plot1.set_xlabel("$x_1$")
plot1.set_ylabel("$x_2$")
plot1.set_zlabel("$y$")
plot1.set_xticks([0, 1])
plot1.set_yticks([0, 1])
plot1.set_zticks([0, 1])
plot1.set_xlim(-0.25, 1.25)
plot1.set_ylim(-0.25, 1.25)
plot1.set_zlim(-0.25, 1.25)

table = plt.table(cellText=[[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]],
                  colWidths=[0.1] * 3,
                  colLabels=["$x_1$", "$x_2$", "$f(\\mathbf{x})$"],
                  cellLoc="center",
                  loc="lower right")

plt.show()



W = tensor([[-7.4109],
        [-7.4109]], requires_grad=True), b = tensor([[11.2890]], requires_grad=True), loss = tensor(0.0174, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
