In [5]:
import torch.nn as nn
import torch
import torch.functional as F

class Encoder1(nn.Module):
    def __init__(self, z_dim, hidden_dim):
        super().__init__()
        self.fc1 = nn.Linear(1000, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, z_dim)
        self.fc22 = nn.Linear(hidden_dim, z_dim)
        self.softplus = nn.Softplus()

    def forward(self, x):
        hidden = self.softplus(self.fc1(x))
        z_loc = self.fc21(hidden)
        z_scale = torch.exp(self.fc22(hidden))
        return z_loc, z_scale

class Decoder1(nn.Module):
    def __init__(self, z_dim, hidden_dim):
        super().__init__()
        self.fc1 = nn.Linear(z_dim, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, 1000)
        self.softplus = nn.Softplus()
        self.sigmoid = nn.Sigmoid()

    def forward(self, z):
        hidden = self.softplus(self.fc1(z))
        loc_img = self.sigmoid(self.fc21(hidden))
        return loc_img


In [6]:
class Encoder2(nn.Module):
    def __init__(self, z_dim, hidden_dim):
        super().__init__()
        self.fc1 = nn.Linear(z_dim, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, z_dim)
        self.fc22 = nn.Linear(hidden_dim, z_dim)
        self.softplus = nn.Softplus()

    def forward(self, z):
        hidden = self.softplus(self.fc1(z))
        z_loc = self.fc21(hidden)
        z_scale = torch.exp(self.fc22(hidden))
        return z_loc, z_scale

class Decoder2(nn.Module):
    def __init__(self, z_dim, hidden_dim):
        super().__init__()
        self.fc1 = nn.Linear(z_dim, hidden_dim)
        self.fc21 = nn.Linear(hidden_dim, 1000)
        self.softplus = nn.Softplus()
        self.sigmoid = nn.Sigmoid()

    def forward(self, z):
        hidden = self.softplus(self.fc1(z))
        loc_img = self.sigmoid(self.fc21(hidden))
        return loc_img


In [10]:
class VAE(nn.Module):
    def __init__(self, z_dim, hidden_dim):
        super().__init__()
        self.encoder1 = Encoder1(z_dim, hidden_dim)
        self.decoder1 = Decoder1(z_dim, hidden_dim)
        self.encoder2 = Encoder2(z_dim, hidden_dim)
        self.decoder2 = Decoder2(z_dim, hidden_dim)

    def reparameterize(self, z_loc, z_scale):
        epsilon = torch.randn_like(z_loc)
        z = z_loc + epsilon * z_scale
        return z

    def forward(self, x):
        z_loc1, z_scale1 = self.encoder1(x)
        z1 = self.reparameterize(z_loc1, z_scale1)
        x_recon1 = self.decoder1(z1)

        z_loc2, z_scale2 = self.encoder2(x_recon1)
        z2 = self.reparameterize(z_loc2, z_scale2)
        x_recon2 = self.decoder2(z2)

        return x_recon2, z_loc1, z_scale1, z_loc2, z_scale2

    def compute_loss(self, x, x_recon2, z_loc1, z_scale1, z_loc2, z_scale2):
        # Reconstruction loss
        recon_loss = F.binary_cross_entropy(x_recon2, x.view(1, 1000), reduction='sum')

        # KL divergence loss
        kl_loss1 = -0.5 * torch.sum(1 + torch.log(z_scale1.pow(2)) - z_loc1.pow(2) - z_scale1.pow(2))
        kl_loss2 = -0.5 * torch.sum(1 + torch.log(z_scale2.pow(2)) - z_loc2.pow(2) - z_scale2.pow(2))

        # Total loss
        loss = recon_loss + kl_loss1 + kl_loss2

        return loss


In [12]:
# 定义超参数
z_dim = 20
hidden_dim = 256
# 随机生成数据
data = torch.randn(1000, 1)
data = data.reshape((1, 1000))


# 实例化VAE模型
vae = VAE(z_dim, hidden_dim)

# 定义优化器
optimizer = torch.optim.Adam(vae.parameters(), lr=0.001)

num_epochs = 2000
# 训练循环
for epoch in range(num_epochs):
    optimizer.zero_grad()
    x_recon2, z_loc1, z_scale1, z_loc2, z_scale2 = vae.forward(data)
    loss = vae.compute_loss(data, x_recon2, z_loc1, z_scale1, z_loc2, z_scale2)
    loss.backward()
    optimizer.step()


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1000 and 20x256)

In [7]:
import torch
from torch import nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import DataLoader
import utils
from utils import make_dir


class CVAE(nn.Module):
    """Implementation of CVAE(Conditional Variational Auto-Encoder)"""
    def __init__(self, feature_size, class_size, latent_size):
        super(CVAE, self).__init__()

        self.fc1 = nn.Linear(feature_size + class_size, 200)
        self.fc2_mu = nn.Linear(200, latent_size)
        self.fc2_log_std = nn.Linear(200, latent_size)
        self.fc3 = nn.Linear(latent_size + class_size, 200)
        self.fc4 = nn.Linear(200, feature_size)

    def encode(self, x, y):
        h1 = F.relu(self.fc1(torch.cat([x, y], dim=1)))  # concat features and labels
        mu = self.fc2_mu(h1)
        log_std = self.fc2_log_std(h1)
        return mu, log_std

    def decode(self, z, y):
        h3 = F.relu(self.fc3(torch.cat([z, y], dim=1)))  # concat latents and labels
        recon = torch.sigmoid(self.fc4(h3))  # use sigmoid because the input image's pixel is between 0-1
        return recon

    def reparametrize(self, mu, log_std):
        std = torch.exp(log_std)
        eps = torch.randn_like(std)  # simple from standard normal distribution
        z = mu + eps * std
        return z

    def forward(self, x, y):
        mu, log_std = self.encode(x, y)
        z = self.reparametrize(mu, log_std)
        recon = self.decode(z, y)
        return recon, mu, log_std

    def loss_function(self, recon, x, mu, log_std) -> torch.Tensor:
        recon_loss = F.mse_loss(recon, x, reduction="sum")  # use "mean" may have a bad effect on gradients
        kl_loss = -0.5 * (1 + 2*log_std - mu.pow(2) - torch.exp(2*log_std))
        kl_loss = torch.sum(kl_loss)
        loss = recon_loss + kl_loss
        return loss


if __name__ == '__main__':
    epochs = 100
    batch_size = 100

    recon = None
    img = None

    utils.make_dir("./00_img/cvae")
    utils.make_dir("./02_model_weights/cvae")

    train_data = torchvision.datasets.MNIST(
        root='./mnist',
        train=True,
        transform=torchvision.transforms.ToTensor(),
        download=True
    )

    data_loader = DataLoader(train_data, batch_size=100, shuffle=True)

    cvae = CVAE(feature_size=784, class_size=10, latent_size=10)

    optimizer = torch.optim.Adam(cvae.parameters(), lr=1e-3)

    for epoch in range(100):
        train_loss = 0
        i = 0
        for batch_id, data in enumerate(data_loader):
            img, label = data
            inputs = img.reshape(img.shape[0], -1)
            y = utils.to_one_hot(label.reshape(-1, 1), num_class=10)
            recon, mu, log_std = cvae(inputs, y)
            loss = cvae.loss_function(recon, inputs, mu, log_std)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            i += 1

            if batch_id % 100 == 0:
                print("Epoch[{}/{}], Batch[{}/{}], batch_loss:{:.6f}".format(
                    epoch+1, epochs, batch_id+1, len(data_loader), loss.item()))

        print("======>epoch:{},\t epoch_average_batch_loss:{:.6f}============".format(epoch+1, train_loss/i), "\n")

        # save imgs
        if epoch % 10 == 0:
            imgs = utils.to_img(recon.detach())
            path = "./00_img/cvae/epoch{}.png".format(epoch+1)

ImportError: cannot import name 'make_dir' from 'utils' (d:\04_python_project\04_python_machine_learning\00_pytorch_project\06_Project_VI\03_experiment_demo\00_apple_dema\utils\__init__.py)