In [11]:
import importlib
import numpy as np
import torch.utils.data
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import torch.nn as nn
from torch.autograd import Variable
import torch
from torchvision import datasets, transforms


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
def show_images(images):
    images = torchvision.utils.make_grid(images)
    show_image(images)


def show_image(img):
    plt.imshow(img.permute(1, 2, 0), cmap="gray")
    plt.show()

In [14]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

class UnFlatten(nn.Module):
    def forward(self, input, size=1024):
        return input.view(input.size(0), 1024, 1, 1)

class ConvVAE(nn.Module):
    def __init__(
        self,
        image_channels=3,
        input_dim=image_size,
        h_dim=hidden_size,
        latent_size=latent_size,
    ):
        super(ConvVAE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, 4, 2),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32, 64, 4, 2),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, 4, 2),
            nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, 4, 2),
            nn.LeakyReLU(0.2),
            Flatten(),
        )
        self.encoder_mean = nn.Linear(h_dim, latent_size)
        self.encoder_logvar = nn.Linear(h_dim, latent_size)
        self.fc = nn.Linear(latent_size, h_dim)
        self.decoder = nn.Sequential(
            UnFlatten(),
            nn.ConvTranspose2d(h_dim, 128, 5, 2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 5, 2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 6, 2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, image_channels, 6, 2),
            nn.Sigmoid(),
        )

    def sample(self, log_var, mean):
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mean)

    def forward(self, x):
        x = self.encoder(x)
        log_var = self.encoder_logvar(x)
        mean = self.encoder_mean(x)
        z = self.sample(log_var, mean)
        x = self.fc(z)
        x = self.decoder(x)

        return x, mean, log_var


In [15]:
batch_size = 32  # number of inputs in each batch
epochs = 10  # times to run the model on complete data
lr = 1e-3  # learning rate
train_loss = []

image_size = 32  # dimension of the image
hidden_size = 1024  # hidden dimension
latent_size = 32  # latent vector dimension
train_data = datasets.SVHN(
    ".",
    split="train",
    download=True,
    transform=transforms.Compose([transforms.Resize(64), transforms.ToTensor()]),
)
test_data = datasets.SVHN(
    ".",
    split="test",
    download=True,
    transform=transforms.Compose([transforms.Resize(64), transforms.ToTensor()]),
)


trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=8)
testloader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=8)

vae = ConvVE().to(device)
optimizer = optim.Adam(vae.parameters(), lr=lr)
vae.train()
for epoch in range(epochs):
    for i, (images, _) in enumerate(trainloader):
        images = images.to(device)
        optimizer.zero_grad()
        reconstructed_image, mean, log_var = vae(images)
        CE = F.binary_cross_entropy(reconstructed_image, images, reduction="sum")
        KLD = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
        loss = CE + KLD
        loss.backward()
        train_loss.append(loss.item())
        optimizer.step()
        if i % 100 == 0:
            print("Loss:")
            print(loss.item() / len(images))

Using downloaded and verified file: ./train_32x32.mat
Using downloaded and verified file: ./test_32x32.mat


NameError: name 'DataLoader' is not defined