In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import numpy as np
import pandas as pd
import torch
import sys
import torch.optim
import torch.nn as nn
import torch.nn.init as init
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F
from PIL import Image
import time
import csv
import os
from os import path

batch_size = 64
n_workers = 2
cuda = torch.cuda.is_available()
print(cuda, sys.version)
device = torch.device("cuda" if cuda else "cpu")

True 3.7.10 (default, Feb 20 2021, 21:17:23) 
[GCC 7.5.0]


In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8651875132216150705, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 15505193728
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 8219768358649371244
 physical_device_desc: "device: 0, name: Tesla V100-SXM2-16GB, pci bus id: 0000:00:04.0, compute capability: 7.0"]

In [None]:
!cp /content/gdrive/MyDrive/dl/project/unique-142p.zip /content
!cp /content/gdrive/MyDrive/dl/project/scene-change.csv /content

In [None]:
!unzip /content/unique-142p.zip

In [None]:
class TrainDataset(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        imgs = os.listdir(main_dir)
        self.total_imgs = []
        for i in imgs:
          if 'frame' in i and int(i[6:-4]) > 25947:
            self.total_imgs.append(i) 

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image

class ValDataset(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        imgs = os.listdir(main_dir)
        self.total_imgs = []
        for i in imgs:
          if 'frame' in i and int(i[6:-4]) <= 25947:
            self.total_imgs.append(i) 

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        return tensor_image

transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])


In [None]:
train_dataset = TrainDataset("unique-142p", transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=n_workers, pin_memory=False, drop_last=True)

val_dataset = ValDataset("unique-142p", transform)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=n_workers, pin_memory=False, drop_last=True)

In [None]:
def reconstruction_loss(x, x_recon, distribution):
    batch_size = x.size(0)
    assert batch_size != 0

    if distribution == 'bernoulli':
        recon_loss = F.binary_cross_entropy_with_logits(x_recon, x, size_average=False).div(batch_size)
    elif distribution == 'gaussian':
        x_recon = F.sigmoid(x_recon)
        recon_loss = F.mse_loss(x_recon, x, size_average=False).div(batch_size)
    else:
        recon_loss = None

    return recon_loss


def kl_divergence(mu, logvar):
    batch_size = mu.size(0)
    assert batch_size != 0
    if mu.data.ndimension() == 4:
        mu = mu.view(mu.size(0), mu.size(1))
    if logvar.data.ndimension() == 4:
        logvar = logvar.view(logvar.size(0), logvar.size(1))

    klds = -0.5*(1 + logvar - mu.pow(2) - logvar.exp())
    total_kld = klds.sum(1).mean(0, True)
    dimension_wise_kld = klds.mean(0)
    mean_kld = klds.mean(1).mean(0, True)

    return total_kld, dimension_wise_kld, mean_kld

In [None]:
def reparametrize(mu, logvar):
    std = logvar.div(2).exp()
    eps = Variable(std.data.new(std.size()).normal_())
    return mu + std*eps


class View(nn.Module):
    def __init__(self, size):
        super(View, self).__init__()
        self.size = size

    def forward(self, tensor):
        return tensor.view(self.size)

In [None]:
class BetaVAE_H(nn.Module):
    """Model proposed in original beta-VAE paper(Higgins et al, ICLR, 2017)."""

    def __init__(self, z_dim=10, nc=3):
        super(BetaVAE_H, self).__init__()
        self.z_dim = z_dim
        self.nc = nc
        self.encoder = nn.Sequential(
            nn.Conv2d(nc, 32, 4, 2, 1),          # B,  32, 32, 32
            nn.ReLU(True),
            nn.Conv2d(32, 32, 4, 2, 1),          # B,  32, 16, 16
            nn.ReLU(True),
            nn.Conv2d(32, 64, 4, 2, 1),          # B,  64,  8,  8
            nn.ReLU(True),
            nn.Conv2d(64, 64, 4, 2, 1),          # B,  64,  4,  4
            nn.ReLU(True),
            nn.Conv2d(64, 256, 4, 1),            # B, 256,  1,  1
            nn.ReLU(True),
            View((-1, 256*1*1)),                 # B, 256
            nn.Linear(256, z_dim*2),             # B, z_dim*2
        )
        self.decoder = nn.Sequential(
            nn.Linear(z_dim, 256),               # B, 256
            View((-1, 256, 1, 1)),               # B, 256,  1,  1
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 64, 4),      # B,  64,  4,  4
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 64, 4, 2, 1), # B,  64,  8,  8
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), # B,  32, 16, 16
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 32, 4, 2, 1), # B,  32, 32, 32
            nn.ReLU(True),
            nn.ConvTranspose2d(32, nc, 4, 2, 1),  # B, nc, 64, 64
        )

        self.weight_init()

    def weight_init(self):
        for block in self._modules:
            for m in self._modules[block]:
                kaiming_init(m)

    def forward(self, x, train=True):
        distributions = self.encoder(x)
        mu = distributions[:, :self.z_dim]
        logvar = distributions[:, self.z_dim:]
        z = reparametrize(mu, logvar)
        x_recon = self.decoder(z)
        if not train:
          return x_recon, z
          
        return x_recon, mu, logvar

In [None]:
def kaiming_init(m):
    if isinstance(m, (nn.Linear, nn.Conv2d)):
        init.kaiming_normal_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)
    elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
        m.weight.data.fill_(1)
        if m.bias is not None:
            m.bias.data.fill_(0)


def normal_init(m, mean, std):
    if isinstance(m, (nn.Linear, nn.Conv2d)):
        m.weight.data.normal_(mean, std)
        if m.bias.data is not None:
            m.bias.data.zero_()
    elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):
        m.weight.data.fill_(1)
        if m.bias.data is not None:
            m.bias.data.zero_()

In [None]:
model = BetaVAE_H()
print(model)

BetaVAE_H(
  (encoder): Sequential(
    (0): Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(32, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): Conv2d(64, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(64, 256, kernel_size=(4, 4), stride=(1, 1))
    (9): ReLU(inplace=True)
    (10): View()
    (11): Linear(in_features=256, out_features=20, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=10, out_features=256, bias=True)
    (1): View()
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(256, 64, kernel_size=(4, 4), stride=(1, 1))
    (4): ReLU(inplace=True)
    (5): ConvTranspose2d(64, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): ConvTranspose2d(64, 32, kernel_siz

In [None]:
def train(model, optimiser, dataloader, beta):
    model.train()
    # self.C_max = Variable(cuda(torch.FloatTensor([self.C_max]), self.use_cuda))
    rl = 0
    bl = 0
    kl = 0
    trl = []
    tbl = []
    tkl = []
    for i, x in enumerate(dataloader):
        x = x.cuda()
        x_recon, mu, logvar = model(x)
        recon_loss = reconstruction_loss(x, x_recon, 'gaussian')
        total_kld, dim_wise_kld, mean_kld = kl_divergence(mu, logvar)

        beta_vae_loss = recon_loss + beta*total_kld
        
        optimiser.zero_grad()
        beta_vae_loss.backward()
        optimiser.step()
        
        rl+=recon_loss.item()
        kl+=total_kld.item()
        bl+=beta_vae_loss.item()

        if i % 20 == 0:
            print('[{}] recon_loss:{:.3f} total_kld:{:.3f} mean_kld:{:.3f} beta_vae_loss:{:.3f}'.format(
                i, recon_loss.item(), total_kld.item(), mean_kld.item(), beta_vae_loss.item()))

            trl.append(recon_loss.item())
            tkl.append(total_kld.item())
            tbl.append(beta_vae_loss.item())

    return rl/len(dataloader), bl/len(dataloader), kl/len(dataloader), trl, tbl, tkl

In [None]:
def val(model, optimiser, dataloader, beta):
    model.eval()
    rl = 0
    bl = 0
    kl = 0
    trl = []
    tbl = []
    tkl = []
    for i, x in enumerate(dataloader):
        x = x.cuda()
        x_recon, mu, logvar = model(x)
        recon_loss = reconstruction_loss(x, x_recon, 'gaussian')
        total_kld, dim_wise_kld, mean_kld = kl_divergence(mu, logvar)

        beta_vae_loss = recon_loss + beta*total_kld
        
        rl+=recon_loss.item()
        kl+=total_kld.item()
        bl+=beta_vae_loss.item()

        if i % 20 == 0:
            print('[{}] recon_loss:{:.3f} total_kld:{:.3f} mean_kld:{:.3f} beta_vae_loss:{:.3f}'.format(
                i, recon_loss.item(), total_kld.item(), mean_kld.item(), beta_vae_loss.item()))

            trl.append(recon_loss.item())
            tkl.append(total_kld.item())
            tbl.append(beta_vae_loss.item())

    return rl/len(dataloader), bl/len(dataloader), kl/len(dataloader), trl, tbl, tkl

In [None]:
model.cuda()

optimiser = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimiser, step_size=100, gamma=0.5)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimiser, mode="min", factor=0.3, patience=1, verbose=True)
epochs = 100
betas = [0.25, 0.5, 1, 2, 4]


TRL, TBL, TKL, RL, BL, KL = [], [], [], [], [], []
val_TRL, val_TBL, val_TKL, val_RL, val_BL, val_KL = [], [], [], [], [], []

In [None]:
%cd /content/

/content


In [None]:
loss_df = pd.DataFrame([], columns=['Epoch', 'Beta', 'Train Rec_Loss', 'Train KL_Loss', 'Train Beta_Loss','Val Rec_Loss', 'Val KL_Loss', 'Val Beta_Loss'])
loss_df

Unnamed: 0,Epoch,Beta,Train Rec_Loss,Train KL_Loss,Train Beta_Loss,Val Rec_Loss,Val KL_Loss,Val Beta_Loss


In [None]:
for beta in betas:
    model_no = beta
    TRL, TBL, TKL, RL, BL, KL = [], [], [], [], [], []
    val_TRL, val_TBL, val_TKL, val_RL, val_BL, val_KL = [], [], [], [], [], []

    for i in range(epochs):
        print(scheduler.get_last_lr())
        
        print("Epoch", i)
        start = time.time()
        rl, bl, kl, trl, tbl, tkl = train(model, optimiser, train_loader, beta)
        val_rl, val_bl, val_kl, val_trl, val_tbl, val_tkl = val(model, optimiser, val_loader, beta)
        print("RL:", rl, "BL:", bl, "KL:", kl)
        print("val_RL:", val_rl, "val_BL:", val_bl, "val_KL:", val_kl)
        print("Train time:", time.time()-start)
        
        TRL += trl
        TBL += tbl
        TKL += tkl
        RL.append(rl)
        BL.append(bl)
        KL.append(kl)

        val_TRL += val_trl
        val_TBL += val_tbl
        val_TKL += val_tkl
        val_RL.append(val_rl)
        val_BL.append(val_bl)
        val_KL.append(val_kl)
        
        # scheduler.step()
        loss_df = loss_df.append(pd.DataFrame([[i, beta, rl, kl, bl, val_rl, val_kl, val_bl]], columns=['Epoch', 'Beta', 'Train Rec_Loss', 'Train KL_Loss', 'Train Beta_Loss','Val Rec_Loss', 'Val KL_Loss', 'Val Beta_Loss']))
        
        torch.save({
                    'epoch': i,
                    'model_state_dict': model.state_dict(),
                    'optimiser_state_dict': optimiser.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'trl': TRL,
                    'tbl': TBL,
                    'tkl': TKL,
                    'rl': RL,
                    'bl': BL,
                    'kl': KL,
                    'val_trl': val_TRL,
                    'val_tbl': val_TBL,
                    'val_tkl': val_TKL,
                    'val_rl': val_RL,
                    'val_bl': val_BL,
                    'val_kl': val_KL,
                    }, 'gdrive/MyDrive/dl/project/new_model/new_model_' + str(model_no) + '_' + str(i))

loss_df.to_csv('results_05_4_.csv', index=False)
!cp results_05_4_.csv /content/gdrive/MyDrive/dl/project/new_model/