In [28]:
import torch
import torch.nn  as  nn  
import torch.optim as  optim
from   torchvision import datasets,  transforms
import seaborn as  sns   
import matplotlib.pyplot as  plt   # Download the MNIST dataset

transform = transforms.ToTensor() 
train_dataset = datasets.MNIST(root   ='./data',  train=True   ,  download=True   , transform=transform) 
test_dataset = datasets.MNIST(root   ='./data',  train=False,  download=True   , transform=transform) 
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64 ,  shuffle=True   ) 
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

1. Create a neural network: 
    a. Initialize 3 layers
    b. Define the forward function:
        i.  Reshape the data to a fully connected layer. Hint: Use .view() or .flatten().
        ii. Let the input pass through the different layers.
        iii.Consider what activation function you want to use in between the layers, and for the final layer.
    c. Loss function and optimizer:
        i.Consider what loss function and optimizer you want to use.
    d. Create the training loop: e.Create the evaluation loop:
    f. Save the model

2. Report your accuracy, is this satisfactory? Why / why not?
3. Plot the loss curve.

In [29]:
for X, y in test_loader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [30]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [21]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x
        """
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits"""

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [38]:
class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.flatten = nn.Flatten();
        self.fc1 = nn.Linear(in_features=28*28, out_features=20)
        self.fc2 = nn.Linear(in_features=20, out_features=20)
        self.fc3 = nn.Linear(in_features=20, out_features=10)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x
model = MyNetwork().to(device)
print(model)

MyNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=10, bias=True)
  (relu): ReLU()
  (softmax): Softmax(dim=1)
)


In [39]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [40]:
def train(dataloader, model, criterion, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = criterion(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [41]:
def test(dataloader, model, criterion):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += criterion(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [42]:
epochs = 4
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, criterion, optimizer)
    test(test_loader, model, criterion)
print("Done!")

Epoch 1
-------------------------------
loss: 2.302135  [   64/60000]
loss: 2.303326  [ 6464/60000]
loss: 2.301812  [12864/60000]
loss: 2.300762  [19264/60000]
loss: 2.299221  [25664/60000]
loss: 2.302434  [32064/60000]
loss: 2.301721  [38464/60000]
loss: 2.300074  [44864/60000]
loss: 2.300256  [51264/60000]
loss: 2.295749  [57664/60000]
Test Error: 
 Accuracy: 10.3%, Avg loss: 2.299705 

Epoch 2
-------------------------------
loss: 2.300054  [   64/60000]
loss: 2.299288  [ 6464/60000]
loss: 2.298852  [12864/60000]
loss: 2.300163  [19264/60000]
loss: 2.298885  [25664/60000]
loss: 2.296407  [32064/60000]
loss: 2.298699  [38464/60000]
loss: 2.299099  [44864/60000]
loss: 2.296345  [51264/60000]
loss: 2.293783  [57664/60000]
Test Error: 
 Accuracy: 15.2%, Avg loss: 2.295936 

Epoch 3
-------------------------------
loss: 2.296736  [   64/60000]
loss: 2.294695  [ 6464/60000]
loss: 2.296567  [12864/60000]
loss: 2.293051  [19264/60000]
loss: 2.291348  [25664/60000]
loss: 2.295338  [32064/600

In [43]:
PATH = 'model.pth'
torch.save(model.state_dict(), PATH)


In [None]:
MyModel = MyNetwork()
MyModel.load_state_dict(torch.load(PATH))
MyModel.eval()
