<a href="https://colab.research.google.com/github/Ed-Marcavage/PyTorch/blob/Exercises/classification_exercise_refresher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from sklearn.datasets import make_moons

# Make 1000 samples
n_samples = 1000

# Create circles
X, y = make_moons(n_samples, random_state=42)

In [3]:
import torch

X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

# View the first five samples
X[:5], y[:5]

(tensor([[ 0.0087,  0.3682],
         [ 0.9214, -0.4969],
         [ 0.9402, -0.4982],
         [ 0.4659, -0.3454],
         [-0.8504,  0.5261]]),
 tensor([1., 1., 1., 1., 0.]))

In [4]:
# Split data into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2, # 20% test, 80% train
                                                    random_state=42) # make the random split reproducible

len(X_train), len(X_test), len(y_train), len(y_test)

(800, 200, 800, 200)

In [5]:
# Standard PyTorch imports
import torch
from torch import nn

# Make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [10]:
# model_0 = nn.Sequential(
#     nn.Linear(in_features=2, out_features=5),
#     nn.Linear(in_features=5, out_features=1)
# ).to(device)

# model_0

class warmUpModelv0(nn.Module):
  def __init__(self):
    super().__init__()

    self.layer_one = nn.Linear(in_features=2, out_features=5) # takes in 2 features (X), UPSCALES to 5 features
    self.layer_two = nn.Linear(in_features=5, out_features=1) # takes in 5 features, produces 1 feature (y)

  def forward(self,x):
    return self.layer_two(self.layer_one(x))

# 4. Create an instance of the model and send it to target device
model_0 = warmUpModelv0().to(device)
model_0

warmUpModelv0(
  (layer_one): Linear(in_features=2, out_features=5, bias=True)
  (layer_two): Linear(in_features=5, out_features=1, bias=True)
)

In [17]:
model_0.state_dict()

OrderedDict([('layer_one.weight',
              tensor([[ 0.1881,  0.5282],
                      [ 0.0734,  0.1517],
                      [-0.5631,  0.6648],
                      [-0.6878, -0.6102],
                      [-0.2676, -0.0422]])),
             ('layer_one.bias',
              tensor([ 0.3230,  0.0487, -0.5743, -0.5761,  0.1835])),
             ('layer_two.weight',
              tensor([[-0.2665,  0.0442, -0.4323, -0.3174,  0.1822]])),
             ('layer_two.bias', tensor([-0.1603]))])

In [11]:
device, next(model_0.parameters()).device

('cpu', device(type='cpu'))

In [13]:
with torch.inference_mode():
  untrained_preds = model_0(X_test.to(device))
untrained_preds = model_0(X_test.to(device))
print(f"Length of predictions: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Length of test samples: {len(y_test)}, Shape: {y_test.shape}")
print(f"\nFirst 10 predictions:\n{untrained_preds[:10]}")
print(f"\nFirst 10 test labels:\n{y_test[:10]}")

Length of predictions: 200, Shape: torch.Size([200, 1])
Length of test samples: 200, Shape: torch.Size([200])

First 10 predictions:
tensor([[0.4887],
        [0.3013],
        [0.7776],
        [0.2387],
        [0.9028],
        [0.8924],
        [0.4998],
        [0.5626],
        [0.7535],
        [0.3254]], grad_fn=<SliceBackward0>)

First 10 test labels:
tensor([1., 0., 1., 0., 1., 1., 0., 0., 1., 0.])


In [15]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

###Train model
- Forward Pass
- Calc the loss
- Zero gradients
- Perform backpropagation on the loss
- Ser the optimizer (gradient descent)


###Going from raw model outputs to predicted labels (logits -> prediction probabilities -> prediction labels)


In [16]:
model_0.eval()
with torch.inference_mode():
  y_logits = model_0(X_test.to(device))[:5]
y_logits

tensor([[0.4887],
        [0.3013],
        [0.7776],
        [0.2387],
        [0.9028]])

In [18]:
# Use sigmoid on model logits
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs

tensor([[0.6198],
        [0.5748],
        [0.6852],
        [0.5594],
        [0.7115]])

In [19]:
# Find the predicted labels (round the prediction probabilities)
y_preds = torch.round(y_pred_probs)

# In full
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test.to(device))[:5]))

# Check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

# Get rid of extra dimension
y_preds.squeeze()

tensor([True, True, True, True, True])


tensor([1., 1., 1., 1., 1.])

In [14]:
loss_fn = nn.BCEWithLogitsLoss()

optimizer = torch.optim.SGD(params=model_0.parameters(),lr=0.1)

In [21]:
torch.manual_seed(42)

# Set the number of epochs
epochs = 100

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)


# Build training and evaluation loop
for epoch in range(epochs):

    ### Set Training Mode
    model_0.train()

    # 1. Forward pass (model outputs raw logits)
    y_logits = model_0(X_train).squeeze() # squeeze to remove extra `1` dimensions, this won't work unless model and data are on same device
    y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labls

    # 2. Calculate loss/accuracy
    # loss = loss_fn(torch.sigmoid(y_logits), # Using nn.BCELoss you need torch.sigmoid()
    #                y_train)
    loss = loss_fn(y_logits, # Using nn.BCEWithLogitsLoss works with raw logits
                   y_train)

    acc = accuracy_fn(y_true=y_train,
                      y_pred=y_pred)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    ### Testing
    model_0.eval()
    with torch.inference_mode():
        # 1. Forward pass
        test_logits = model_0(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits)) # sigmoid - creates preds probs
        # 2. Caculate loss/accuracy
        test_loss = loss_fn(test_logits,
                            y_test)
        test_acc = accuracy_fn(y_true=y_test,
                               y_pred=test_pred)

    # Print out what's happening every 10 epochs
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

Epoch: 0 | Loss: 0.58596, Accuracy: 76.62% | Test loss: 0.59313, Test acc: 73.50%
Epoch: 10 | Loss: 0.51987, Accuracy: 81.75% | Test loss: 0.53038, Test acc: 80.50%
Epoch: 20 | Loss: 0.45970, Accuracy: 82.12% | Test loss: 0.47383, Test acc: 83.00%
Epoch: 30 | Loss: 0.40944, Accuracy: 82.75% | Test loss: 0.42667, Test acc: 84.00%
Epoch: 40 | Loss: 0.37123, Accuracy: 83.38% | Test loss: 0.39039, Test acc: 84.50%
Epoch: 50 | Loss: 0.34365, Accuracy: 83.62% | Test loss: 0.36356, Test acc: 85.00%
Epoch: 60 | Loss: 0.32392, Accuracy: 84.25% | Test loss: 0.34372, Test acc: 84.50%
Epoch: 70 | Loss: 0.30952, Accuracy: 84.50% | Test loss: 0.32868, Test acc: 85.00%
Epoch: 80 | Loss: 0.29866, Accuracy: 84.88% | Test loss: 0.31689, Test acc: 85.50%
Epoch: 90 | Loss: 0.29014, Accuracy: 85.00% | Test loss: 0.30733, Test acc: 87.00%
