In [28]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# **RNN for image classification**

In [29]:
augmentation = transforms.Compose(
    [
    transforms.RandomCrop(size=28,padding=4),
    transforms.RandomRotation(degrees=30),
    transforms.ToTensor()
    ]
)

train_set = torchvision.datasets.MNIST(
    root='.',
    download=True,
    train=True,
    transform=augmentation
)

test_set = torchvision.datasets.MNIST(
    root='.',
    download=True,
    train=False,
    transform= transforms.ToTensor()
)

In [5]:
print(train_set.data.shape)
print(test_set.data.shape)

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


In [6]:
batch_size = 128
train_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=batch_size,
)

In [7]:
for x, y in train_loader:
    print(x.shape, y)
    break

tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        ...,


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0.

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [38]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_rnnlayer, output_size, ) -> None:
        super().__init__()
        self.D = input_size
        self.M = hidden_size
        self.K = output_size
        self.L = n_rnnlayer

        self.rnn = nn.LSTM(
            input_size=self.D,
            hidden_size= self.M,
            num_layers = self.L,
            batch_first = True # N is the first dimension
        )

        self.fc = nn.Linear(self.M, self.K)
        
    def forward(self, X):
        h0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        c0 = torch.zeros(self.L, X.size(0), self.M).to(device)
        out, _ = self.rnn(X, (h0, c0))
        out = self.fc(out[:,-1,:])
        
        return out

In [39]:
model = RNN(input_size=28, hidden_size=128, n_rnnlayer=2, output_size=10)
model.to(device)

RNN(
  (rnn): LSTM(28, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)

In [40]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [43]:
def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs=10):
    train_losses, test_losses = np.zeros(epochs), np.zeros(epochs)

    for it in range(epochs):
        train_loss = []
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            inputs = inputs.view(-1, 28, 28)

            optimizer.zero_grad()

            # forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # backward
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        train_loss = np.mean(train_loss)

        test_loss = []
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            train_loss.append(loss.item())
        test_loss = np.mean(test_loss)

        train_losses[it], test_losses[it] = train_loss, test_loss

        print(f'Epoch: {it+1}, train loss: {train_loss:.4f}, test loss: {test_loss:.4f}')
    
    return train_losses, test_losses

In [1]:
batch_gd(model, criterion, optimizer, train_loader, test_loader)

NameError: name 'batch_gd' is not defined