In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim

In [20]:
class AutoEncoder(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        # Initiate a linear function theta*x + b
        self.hidden_size = kwargs["hidden_size"]
        self.input_shape = kwargs["input_shape"]
        self.encoder_hidden_layer = nn.Linear(in_features=self.input_shape, out_features=self.hidden_size)
        self.encoder_output_layer = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size)
        self.decoder_hidden_layer = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size)
        self.decoder_output_layer = nn.Linear(in_features=self.hidden_size, out_features=self.input_shape)
    
    # x denotes features
    def forward(self, x):
        activation = self.encoder_hidden_layer(x)
        activation = torch.relu(activation)
        code = self.encoder_output_layer(activation)
        code = torch.relu(code)
        activation = self.decoder_hidden_layer(code)
        activation = torch.relu(activation)
        activation = self.decoder_output_layer(activation)
        reconstructed = torch.relu(activation)
        return reconstructed

In [21]:
batch_size = 128
hidden_size = 128
input_shape = 784
lr = 1e-3

# Convert numpy array to tensor
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

# Define 
train = torchvision.datasets.MNIST(
    root="~/torch_datasets", train=True, transform=transform, download=True
)

test = torchvision.datasets.MNIST(
    root="~/torch_datasets", train=False, transform=transform, download=True
)

train_loader = torch.utils.data.DataLoader(
    train, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True
)

test_loader = torch.utils.data.DataLoader(
    test, batch_size=batch_size, shuffle=False, num_workers=4
)

In [22]:
# Use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define model
model = AutoEncoder(input_shape=input_shape, hidden_size=hidden_size).to(device)

# Define Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)

# Mean-squared error loss
criterion = nn.MSELoss()

In [13]:
# training & testing (with Adam)

epochs=1
for epoch in range(epochs):
    loss = 0
    for batch_features, _ in train_loader:
        # reshape mini-batch data to [N, 784] matrix
        # load it to the active device
        batch_features = batch_features.view(-1, 784).to(device)
        
        # reset the gradients back to zero
        # PyTorch accumulates gradients on subsequent backward passes
        optimizer.zero_grad()
        
        # compute reconstructions
        outputs = model(batch_features)
        
        # compute training reconstruction loss
        train_loss = criterion(outputs, batch_features)
        
        # compute accumulated gradients
        train_loss.backward()
        
        # perform parameter update based on current gradients
        optimizer.step()
        
        # add the mini-batch training loss to epoch loss
        loss += train_loss.item()
    
    # compute the epoch training loss
    loss = loss / len(train_loader)
    
    # display the epoch training loss
    print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))

epoch : 1/1, loss = 0.019391
