In [None]:
# Imports
import torch
import torchvision
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F  # All functions that don't have any parameters
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment and creates mini batches
import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
import torchvision.transforms as transforms  # Transformations we can perform on our dataset

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# Hyperparameters
input_size = 28
hidden_size = 256
num_layers = 2
num_classes = 10
num_epochs = 10
sequence_length = 28
learning_rate = 0.001
batch_size = 64
load_model = True
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        
    # forward
    def forward(self, x):
        # set initial hidden and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        # forward propagate
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)
        # decode the hidden state
        out = self.fc(out)
        return out

def save_checkpoint(state, filename="mycheckpoint.pth.tar"):
  print("Saving Checkpoint")
  torch.save(state, filename)

def load_checkpoint(checkpoint):
  print("Load Checkpoint")
  model.load_state_dict(checkpoint["state_dict"])
  optimizer.load_state_dict(checkpoint["optimizer"])

# Load Data
train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
)

test_dataset = datasets.MNIST(
    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

# Initialize network
model = RNN(input_size, hidden_size, num_layers, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
if load_model:
  load_checkpoint(torch.load("/content/mycheckpoint.pth.tar"))

# Train Network
for epoch in range(num_epochs):  
  losses = []
  if epoch % 3 == 0:
    checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
    save_checkpoint(checkpoint)
  for batch_idx, (data, targets) in enumerate(train_loader):
    data = data.to(device=device).squeeze(1)
    targets = targets.to(device=device)

    # forward
    scores = model(data)
    loss = criterion(scores, targets)
    losses.append(loss.item())
    # backward 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  mean_loss = sum(losses) / len(losses)
  print(f"Loss at epoch {epoch} was {mean_loss : .5f}")

# check accuracy
# def check_accuracy(loader, model):
#     if loader.dataset.train:
#         print("Checking accuracy on train")
#     else:
#         print("checking accuracy on test")
        
#     num_correct = 0
#     num_samples = 0
#     model.eval()
#     with torch.no_grad():
#         for x, y in loader:
#             x = x.to(device=device).squeeze(1)
#             y = y.to(device=device)
#             scores = model(x)
#             _, predictions = scores.max(1)
#             num_correct += (predictions == y).sum()
#             num_samples += predictions.size(0)
#         print(
#             f"Got {num_correct} / {num_samples} with \
#               accuracy {float(num_correct)/float(num_samples)*100:.2f}"
#         )
#     model.train()

# check_accuracy(train_loader, model)
# check_accuracy(test_loader, model)

cpu
Load Checkpoint
Saving Checkpoint
Loss at epoch 0 was  0.19813
