## **Imports**

In [1]:
import torch

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

## **Data**

In [3]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

## **Define train & accuracy computation function**

In [16]:
def train(model, optimizer, criterion):
    for step in range(10001):
        hypothesis = model(X)

        # cost/loss function
        cost = criterion(hypothesis, Y)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        if step % 1000 == 0:
            print(f'Step : {step}, Cost : {cost.item():.6f}')

In [5]:
# Accuracy computation
# True if hypothesis>0.5 else False

def compute_accuracy(model, X, Y):
    with torch.no_grad():
        hypothesis = model(X)
        predicted = (hypothesis > 0.5).float()
        accuracy = (predicted == Y).float().mean()
        print(
            'Hypothesis:\n', hypothesis.detach().cpu().numpy(), 
            '\nPredict:\n', predicted.detach().cpu().numpy(),
            '\nGround Truth:\n', Y.cpu().numpy(),
            '\nAccuracy:\n', accuracy.item()
            )

## **Model 1 - Only One Linear Layer**

In [6]:
# nn layers
linear = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

# model
model = torch.nn.Sequential(linear, sigmoid).to(device)

# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [7]:
train(model, optimizer, criterion)

Step : 0, Cost : 0.727397
Step : 1000, Cost : 0.693147
Step : 2000, Cost : 0.693147
Step : 3000, Cost : 0.693147
Step : 4000, Cost : 0.693147
Step : 5000, Cost : 0.693147
Step : 6000, Cost : 0.693147
Step : 7000, Cost : 0.693147
Step : 8000, Cost : 0.693147
Step : 9000, Cost : 0.693147
Step : 10000, Cost : 0.693147


In [8]:
compute_accuracy(model, X, Y)

Hypothesis:
 [[0.5]
 [0.5]
 [0.5]
 [0.5]] 
Predict:
 [[0.]
 [0.]
 [0.]
 [0.]] 
Ground Truth:
 [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:
 0.5


## **Model 2 - Two Linear Layers**

In [13]:
# nn layers
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

# model
model2 = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model2.parameters(), lr=1)  # modified learning rate from 0.1 to 1

In [14]:
train(model2, optimizer, criterion)

Step : 0, Cost : 0.861447
Step : 1000, Cost : 0.036910
Step : 2000, Cost : 0.007736
Step : 3000, Cost : 0.004260
Step : 4000, Cost : 0.002930
Step : 5000, Cost : 0.002230
Step : 6000, Cost : 0.001799
Step : 7000, Cost : 0.001506
Step : 8000, Cost : 0.001296
Step : 9000, Cost : 0.001136
Step : 10000, Cost : 0.001012


In [15]:
compute_accuracy(model2, X, Y)

Hypothesis:
 [[1.3017104e-03]
 [9.9909627e-01]
 [9.9909198e-01]
 [9.3076332e-04]] 
Predict:
 [[0.]
 [1.]
 [1.]
 [0.]] 
Ground Truth:
 [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:
 1.0


## **Model 3 - Four Linear Layers**

In [17]:
# nn layers
linear1 = torch.nn.Linear(2, 10, bias=True)
linear2 = torch.nn.Linear(10, 10, bias=True)
linear3 = torch.nn.Linear(10, 10, bias=True)
linear4 = torch.nn.Linear(10, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

# model
model3 = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

# define cost/loss & optimizer
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model3.parameters(), lr=1)  # modified learning rate from 0.1 to 1

In [19]:
train(model3, optimizer, criterion)

Step : 0, Cost : 0.694419
Step : 1000, Cost : 0.693118
Step : 2000, Cost : 0.693045
Step : 3000, Cost : 0.692499
Step : 4000, Cost : 0.009837
Step : 5000, Cost : 0.000721
Step : 6000, Cost : 0.000347
Step : 7000, Cost : 0.000225
Step : 8000, Cost : 0.000165
Step : 9000, Cost : 0.000130
Step : 10000, Cost : 0.000106


In [20]:
compute_accuracy(model, X, Y)

Hypothesis:
 [[0.5]
 [0.5]
 [0.5]
 [0.5]] 
Predict:
 [[0.]
 [0.]
 [0.]
 [0.]] 
Ground Truth:
 [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:
 0.5
