<a href="https://colab.research.google.com/github/JieRou-1007/FYP/blob/master/vae_save%20and%20load%20model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/pytorch/examples

fatal: destination path 'examples' already exists and is not an empty directory.


In [None]:
 from google.colab import drive
 drive.mount('/content/gdrive') 

Mounted at /content/gdrive


In [None]:
pip install -r /content/examples/vae/requirements.txt



In [None]:
# check if CUDA is available
use_cuda = torch.cuda.is_available()

In [None]:
def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    f_path = checkpoint_path
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, f_path)
    # if it is a best model, min validation loss
    if is_best:
        best_fpath = best_model_path
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(f_path, best_fpath)

In [None]:
def load_ckp(checkpoint_fpath, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model: model that we want to load checkpoint parameters into       
    optimizer: optimizer we defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_fpath)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    valid_loss_min = checkpoint['valid_loss_min']
    # return model, optimizer, epoch value, min validation loss 
    return model, optimizer, checkpoint['epoch'], valid_loss_min

In [None]:
from __future__ import print_function
import argparse
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
import numpy as np
import shutil


parser = argparse.ArgumentParser(description='VAE MNIST Example')
batch_size = 128
n_epochs = 2
seed = 1 
log_interval = 10
# parser.add_argument('--batch-size', type=int, default=128, metavar='N',
#                     help='input batch size for training (default: 128)')
# parser.add_argument('--epochs', type=int, default=20, metavar='N',
#                     help='number of epochs to train (default: 10)')
# parser.add_argument('--no-cuda', action='store_true', default=False,
#                     help='disables CUDA training')
# parser.add_argument('--seed', type=int, default=1, metavar='S',
#                     help='random seed (default: 1)')
# parser.add_argument('--log-interval', type=int, default=10, metavar='N',
#                     help='how many batches to wait before logging training status')
# args = parser.parse_args()
# args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(seed)

device = torch.device("cuda")

kwargs = {'num_workers': 1, 'pin_memory': True} 

loaders = {
  'train': torch.utils.data.DataLoader(
      datasets.CIFAR10('../data', train=True, download=True,
                    transform=transforms.ToTensor()),
      batch_size, shuffle=True, **kwargs),
  
  'test' : torch.utils.data.DataLoader(
      datasets.CIFAR10('../data', train=False, transform=transforms.ToTensor()),
      batch_size, shuffle=True, **kwargs),
}

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(3*32*32, 400)
        self.fc21 = nn.Linear(400, 20)
        self.fc22 = nn.Linear(400, 20)
        self.fc3 = nn.Linear(20, 400)
        self.fc4 = nn.Linear(400, 3*32*32)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(x.size(0), 3*32*32))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar


model = VAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)


# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x.view(x.size(0), 3*32*32), reduction='sum')

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return BCE + KLD


def train(start_epochs, n_epochs, test_loss_min_input, loaders, model, optimizer, criterion, use_cuda, checkpoint_path, best_model_path):
    test_loss_min = test_loss_min_input 

    for epoch in range(start_epochs, n_epochs + 1):
      model.train()
      train_loss = 0
      for batch_idx, (data, _) in enumerate(loaders['train']):
          # move to GPU
          if use_cuda:
              data, _ = data.cuda(), _.cuda()
          data = data.to(device)
          optimizer.zero_grad()
          recon_batch, mu, logvar = model(data)
          loss = loss_function(recon_batch, data, mu, logvar)
          loss.backward()
          train_loss += loss.item()
          optimizer.step()
          if batch_idx % log_interval == 0:
              print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                  epoch, batch_idx * len(data), len(train_loader.dataset),
                  100. * batch_idx / len(train_loader),
                  loss.item() / len(data)))

      print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(train_loader.dataset)))

      #test
      model.eval()
      test_loss = 0
      with torch.no_grad():
          for i, (data, _) in enumerate(loaders['test']):
              # move to GPU
              if use_cuda:
                  data, _ = data.cuda(), _.cuda()
                  data = data.to(device)
              recon_batch, mu, logvar = model(data)
              test_loss += loss_function(recon_batch, data, mu, logvar).item()
              if i == 0:
                  n = min(data.size(0), 8)
                  comparison = torch.cat([data[:n],
                                        recon_batch.view(batch_size, 3, 32, 32)[:n]])
                  save_image(comparison.cpu(),
                          '/content/examples/vae/results/reconstruction_' + str(epoch) + '.png', nrow=n)
                  
      checkpoint = {
              'epoch': epoch + 1,
              'valid_loss_min': test_loss,
              'state_dict': model.state_dict(),
              'optimizer': optimizer.state_dict(),
          }

      checkpoint_path = './cifar_net{}.pth'.format(epoch)
      best_model_path = './best.pth'
      # save checkpoint
      save_ckp(checkpoint, False, checkpoint_path, best_model_path)

      ## TODO: save the model if validation loss has decreased
      if test_loss <= test_loss_min:
          print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(test_loss_min,test_loss))
          # save checkpoint as best model
          save_ckp(checkpoint, True, checkpoint_path, best_model_path)
          test_loss_min = test_loss

      test_loss /= len(test_loader.dataset)
      print('====> Test set loss: {:.4f}'.format(test_loss))

      with torch.no_grad():
        sample = torch.randn(64, 20).to(device)
        sample = model.decode(sample).cpu()
        save_image(sample.view(64, 3, 32, 32),
                    '/content/examples/vae/results/sample_' + str(epoch) + '.png')
      
    return model
   
# PATH = './cifar_net.pth'
# torch.save(model.state_dict(), PATH)  

Files already downloaded and verified


In [None]:
trained_model = train(1, n_epochs, np.Inf,loaders, model, optimizer, loss_function,use_cuda, './cifar_net.pth', './best.pth')



KeyboardInterrupt: ignored

In [None]:
# load model
model = VAE()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = loss_function
# define checkpoint saved path
ckp_path = "/content/cifar_net2.pth"

In [None]:
# load checkpoint
model, optimizer, start_epoch, valid_loss_min = load_ckp(ckp_path, model, optimizer)

In [None]:
print("model = ", model)
print("optimizer = ", optimizer)
print("start_epoch = ", start_epoch)
print("valid_loss_min = ", valid_loss_min)
print("valid_loss_min = {:.6f}".format(valid_loss_min))

model =  VAE(
  (fc1): Linear(in_features=3072, out_features=400, bias=True)
  (fc21): Linear(in_features=400, out_features=20, bias=True)
  (fc22): Linear(in_features=400, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=400, bias=True)
  (fc4): Linear(in_features=400, out_features=3072, bias=True)
)
optimizer =  Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)
start_epoch =  3
valid_loss_min =  18634078.7734375
valid_loss_min = 18634078.773438


In [None]:
trained_model = train(start_epoch, 6, valid_loss_min, loaders, model, optimizer, criterion, use_cuda, "/content/cifar_net2.pth", "/content/best.pth")

====> Epoch: 3 Average loss: 1856.9177
Validation loss decreased (18634078.773438 --> 18575209.476562).  Saving model ...
====> Test set loss: 1857.5209
====> Epoch: 4 Average loss: 1853.0514
Validation loss decreased (18575209.476562 --> 18572386.859375).  Saving model ...
====> Test set loss: 1857.2387
====> Epoch: 5 Average loss: 1850.1726
Validation loss decreased (18572386.859375 --> 18542222.025391).  Saving model ...
====> Test set loss: 1854.2222
====> Epoch: 6 Average loss: 1848.2565
Validation loss decreased (18542222.025391 --> 18537562.636719).  Saving model ...
====> Test set loss: 1853.7563


In [None]:
!python /content/examples/vae/main.py

In [None]:
from __future__ import print_function
import argparse
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.fc1 = nn.Linear(3*32*32, 400)
        self.fc21 = nn.Linear(400, 20)
        self.fc22 = nn.Linear(400, 20)
        self.fc3 = nn.Linear(20, 400)
        self.fc4 = nn.Linear(400, 3*32*32)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(x.size(0), 3*32*32))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

device = torch.device("cuda")
model = VAE().to(device)
# model.load_state_dict(torch.load('/content/cifar_net.pth'))

In [None]:
ckp_path = "/content/cifar_net2.pth"

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
model, optimizer, start_epoch, valid_loss_min = load_ckp(ckp_path, model, optimizer)

In [None]:
print("model = ", model)
print("optimizer = ", optimizer)
print("start_epoch = ", start_epoch)
print("valid_loss_min = ", valid_loss_min)
print("valid_loss_min = {:.6f}".format(valid_loss_min))

model =  VAE(
  (fc1): Linear(in_features=3072, out_features=400, bias=True)
  (fc21): Linear(in_features=400, out_features=20, bias=True)
  (fc22): Linear(in_features=400, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=400, bias=True)
  (fc4): Linear(in_features=400, out_features=3072, bias=True)
)
optimizer =  Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)
start_epoch =  2
valid_loss_min =  1863.40787734375
valid_loss_min = 1863.407877


In [None]:
trained_model = train(start_epoch, 6, valid_loss_min, loaders, model, optimizer, criterion, use_cuda, "./checkpoint/current_checkpoint.pt", "./best_model/best_model.pt")

NameError: ignored

In [None]:
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=False, transform=transforms.ToTensor()),
    batch_size=128, shuffle=True)

In [None]:
output = model(test_loader)

AttributeError: ignored

In [None]:
device = torch.device("cuda")

with torch.no_grad():
        sample = torch.randn(64, 20).to(device)
        sample = model.decode(sample).cpu()
        save_image(sample.view(64, 1,32 , 32),
                    '/content/sample_' + str(epoch) + '.png')


RuntimeError: ignored

In [None]:
model.eval()
test_loss = 0
with torch.no_grad():
    for i, (data, _) in enumerate(test_loader):
        data = data.to(device)
        recon_batch, mu, logvar = model(data)
      #  test_loss += loss_function(recon_batch, data, mu, logvar).item()
        if i == 0:
            n = min(data.size(0), 1)
            comparison = recon_batch.view(128, 3, 32, 32)[:n]
            save_image(comparison.cpu(),
                      '/content/try' + str(2) + '.png', nrow=n)

In [None]:
pip install ipdb

Collecting ipdb
  Downloading https://files.pythonhosted.org/packages/44/8c/76b33b115f4f2c090e2809a0247fe777eb3832f9d606479bf0139b29ca2c/ipdb-0.13.4.tar.gz
Building wheels for collected packages: ipdb
  Building wheel for ipdb (setup.py) ... [?25l[?25hdone
  Created wheel for ipdb: filename=ipdb-0.13.4-cp37-none-any.whl size=10973 sha256=2676474af015f11d7748824b421f5ba55a32797bf36b4a7b387ce0f285bbd692
  Stored in directory: /root/.cache/pip/wheels/56/51/e4/c91c61e3481a1a967beb18c4ea7a2b138a63cce94170b2e206
Successfully built ipdb
Installing collected packages: ipdb
Successfully installed ipdb-0.13.4
