In [1]:
import torch
import torch.nn as nn
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import os
from torch import optim
import time
import torch.nn.functional as  F


from Image_Sampler import Sampler



MODEL_NAME = "clearML"
PATH = "model.pt"
# IMG_TRAIN = "/disk/vanishing_data/is789/anomaly_samples/train_set/"
# IMG_TEST = "/disk/vanishing_data/is789/anomaly_samples/40test/"

parameters = {
    "epoch" : 16000,
    "batch_size" : 10,
    "imgSize": 512,
    "zDim": 128,
    "learning_rate" : 1e-05,
    "layers" : [64, 128, 256, 256, 512, 512, 940],
#     "layers" : [64, 120, 240, 480, 800],
    "reduce_threshold" : [0.6,0.8]
}


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


In [3]:

class VAE(nn.Module):
    
    def __init__(self, imgChannels=3, imgSize=parameters["imgSize"], zDim=parameters["zDim"]):
        super(VAE, self).__init__()
        
        
        stride=[1,2,1,2,2,2,2]
        out_stride=[2,2,2,2,2,2,2]
#         in_stride=[1,2,2,2,2]
#         out_stride=[1,2,2,2,1]
        in_padding=[1,0,1,0,0,0,0]
        in_trans_padding=[0,0,0,0,1,0,1]
        out_padding=[0,0,0,0,0,1,0]
        kernel=[3,3,3,3,3,3,3]
#         layers=[128, 128, 128, 256, 256]
        layers=parameters["layers"]
#         layers=[32, 64, 64, 128, 128]
#         layers=[64, 128, 128, 128, 256]

        # Initializing the 2 convolutional layers and 2 full-connected layers for the encoder
        self.encConv1 = nn.Conv2d(in_channels=imgChannels, out_channels=layers[0], kernel_size=kernel[0], stride=stride[0], padding=in_padding[0])
        self.encBn1 = nn.BatchNorm2d(layers[0])
        self.encConv2 = nn.Conv2d(in_channels=layers[0], out_channels=layers[1], kernel_size=kernel[1], stride=stride[1], padding=in_padding[1])
        self.encBn2 = nn.BatchNorm2d(layers[1])
        self.encConv3 = nn.Conv2d(in_channels=layers[1], out_channels=layers[2], kernel_size=kernel[2], stride=stride[2], padding=in_padding[2])
        self.encBn3 = nn.BatchNorm2d(layers[2])
        self.encConv4 = nn.Conv2d(in_channels=layers[2], out_channels=layers[3], kernel_size=kernel[3], stride=stride[3], padding=in_padding[3])
        self.encBn4 = nn.BatchNorm2d(layers[3])
        self.encConv5 = nn.Conv2d(in_channels=layers[3], out_channels=layers[4], kernel_size=kernel[4], stride=stride[4], padding=in_padding[4])
        self.encBn5 = nn.BatchNorm2d(layers[4])
        self.encConv6 = nn.Conv2d(in_channels=layers[4], out_channels=layers[5], kernel_size=kernel[5], stride=stride[5], padding=in_padding[5])
        self.encBn6 = nn.BatchNorm2d(layers[5])
        self.encConv7 = nn.Conv2d(in_channels=layers[5], out_channels=layers[6], kernel_size=kernel[6], stride=stride[6], padding=in_padding[6])
        self.encBn7 = nn.BatchNorm2d(layers[6])
        
        encoderDims = self.calcEncoderDims(len(layers), imgSize, kernel, in_padding, stride)
        featureDim = layers[-1] * encoderDims[-1] * encoderDims[-1]
#         self.encFC1 = nn.Linear(featureDim, zDim)

#         self.decFC1 = nn.Linear(zDim, featureDim)
#         self.decBn1 = nn.BatchNorm1d(featureDim)
        self.decConv1 = nn.ConvTranspose2d(in_channels=layers[6], out_channels=layers[5], kernel_size=kernel[6], stride=stride[6], padding=in_trans_padding[0], output_padding=out_padding[0])
        self.decBn2 = nn.BatchNorm2d(layers[5])
        self.decConv2 = nn.ConvTranspose2d(in_channels=layers[5], out_channels=layers[4], kernel_size=kernel[5], stride=stride[5], padding=in_trans_padding[1], output_padding=out_padding[1])
        self.decBn3 = nn.BatchNorm2d(layers[4])
        self.decConv3 = nn.ConvTranspose2d(in_channels=layers[4], out_channels=layers[3], kernel_size=kernel[4], stride=stride[4], padding=in_trans_padding[2], output_padding=out_padding[2])
        self.decBn4 = nn.BatchNorm2d(layers[3])
        self.decConv4 = nn.ConvTranspose2d(in_channels=layers[3], out_channels=layers[2], kernel_size=kernel[3], stride=stride[3], padding=in_trans_padding[3], output_padding=out_padding[3])
        self.decBn5 = nn.BatchNorm2d(layers[2])
        self.decConv5 = nn.ConvTranspose2d(in_channels=layers[2], out_channels=layers[1], kernel_size=kernel[2], stride=stride[2], padding=in_trans_padding[4], output_padding=out_padding[4])
        self.decBn6 = nn.BatchNorm2d(layers[1])
        self.decConv6 = nn.ConvTranspose2d(in_channels=layers[1], out_channels=layers[0], kernel_size=kernel[1], stride=stride[1], padding=in_trans_padding[5], output_padding=out_padding[5])
        self.decBn7 = nn.BatchNorm2d(layers[0])
        self.decConv7 = nn.ConvTranspose2d(in_channels=layers[0], out_channels=imgChannels, kernel_size=kernel[0], stride=stride[0], padding=in_trans_padding[6], output_padding=out_padding[6])
        
        self.final_encoder_dim = None
        
        decoderDims = self.calcDecoderDims(len(layers), encoderDims[-1], kernel, in_trans_padding, out_padding, stride)
        self.printModel(layers, encoderDims, decoderDims, imgSize, imgChannels)

    def calcEncoderDims(self, layer_size, imageSize, kernel, in_padding, stride):
        newDims = [imageSize]
        for x in range(layer_size):
#             tmpSize = int((newDims[-1]-kernel[x]+2*in_padding[x])/stride[x])+1
            tmpSize = int(((newDims[-1] + 2*in_padding[x]-(kernel[x]-1)-1)/stride[x])+1)
            newDims.append(tmpSize)
        newDims.pop(0)
        return newDims
    
    def calcDecoderDims(self, layer_size, imageSize, kernel, in_trans_padding, out_padding, stride, d=1):
        newDims = [imageSize]
        for x in range(layer_size):            
            tmpSize = (newDims[-1] - 1)*stride[layer_size-1-x] - 2*in_trans_padding[x] + d*(kernel[layer_size-1-x] - 1) + out_padding[x] + 1
            newDims.append(tmpSize)
#         newDims.pop(0)
        return newDims
    
    
    def printModel(self, layers, encDims, decDims, imageSize, imgChannels):
        print("=============")
        print("Image Flow:")
        print("Encoder:")
        print(f"{imageSize}x{imageSize}x{imgChannels} (Input Image)")
        for x in range(len(layers)):
            print(f"{encDims[x]}x{encDims[x]}x{layers[x]}")
        
        print("Decoder:")
        k = len(layers) - 1
        for x in range(len(layers)):
            print(f"{decDims[x]}x{decDims[x]}x{layers[k]}")
            k = k - 1
        print(f"{decDims[-1]}x{decDims[-1]}x{imgChannels} (Output Image)")
        print("=============")
            
        
    def encoder(self, x):

        x = F.leaky_relu(self.encConv1(x))
        x = self.encBn1(x)
        x = F.leaky_relu(self.encConv2(x))
        x = self.encBn2(x)
        x = F.leaky_relu(self.encConv3(x))
        x = self.encBn3(x)
        x = F.leaky_relu(self.encConv4(x))
        x = self.encBn4(x)
        x = F.leaky_relu(self.encConv5(x))
        x = self.encBn5(x)
        x = F.leaky_relu(self.encConv6(x))
        x = self.encBn6(x)
        x = F.leaky_relu(self.encConv7(x))
        x = self.encBn7(x)
#         self.final_encoder_dim = np.array([x.size(1), x.size(2), x.size(3)])
#         flatten = np.prod(self.final_encoder_dim)

#         x = x.view(-1, flatten)
#         z = F.leaky_relu(self.encFC1(x))
        
#         return z
        return x

#     def reparameterize(self, mu, logVar):

#         #Reparameterization takes in the input mu and logVar and sample the mu + std * eps
#         std = torch.exp(logVar/2)
#         eps = torch.randn_like(std)
#         return mu + std * eps

    def decoder(self, x):

#         x = F.leaky_relu(self.decFC1(x))
#         x = self.decBn1(x)
#         x = x.view(-1, self.final_encoder_dim[0], self.final_encoder_dim[1], self.final_encoder_dim[2])
        x = F.leaky_relu(self.decConv1(x))
        x = self.decBn2(x)
        x = F.leaky_relu(self.decConv2(x))
        x = self.decBn3(x)
        x = F.leaky_relu(self.decConv3(x))
        x = self.decBn4(x)
        x = F.leaky_relu(self.decConv4(x))
        x = self.decBn5(x)
        x = F.leaky_relu(self.decConv5(x))
        x = self.decBn6(x)
        x = F.leaky_relu(self.decConv6(x))
        x = self.decBn7(x)
        x = torch.sigmoid(self.decConv7(x))
        return x

    def forward(self, x):
        z = self.encoder(x)
        out = self.decoder(z)
        return out

In [4]:
model = VAE()
model.to(device)
model.load_state_dict(torch.load(PATH))

Image Flow:
Encoder:
512x512x3 (Input Image)
512x512x64
255x255x128
255x255x256
127x127x256
63x63x512
31x31x512
15x15x940
Decoder:
15x15x940
31x31x512
63x63x512
127x127x256
255x255x256
255x255x128
512x512x64
512x512x3 (Output Image)


KeyboardInterrupt: 

In [None]:

def loss_fn(x, recon_x):
#     Recon_loss = F.mse_loss(recon_x.view(-1, 1024), x.view(-1, 1024), reduction = "sum")
#     Recon_loss = F.mse_loss(recon_x.view(-1, 1024), x.view(-1, 1024)) * 32 * 32
#     Recon_loss = F.binary_cross_entropy(recon_x.view(-1, 1024), x.view(-1, 1024)) * 32 * 32 *3
#     KLD_loss = 1 + log_var - mu.pow(2) - log_var.exp()
#     KLD_loss = torch.sum(KLD_loss)
#     KLD_loss *= -0.5
#     return torch.mean(Recon_loss + KLD_loss)
#     Recon_loss = F.mse_loss(recon_x.view(-1, 2500), x.view(-1, 2500), reduction = "sum") * 32 * 32 *3
#     Recon_loss = F.binary_cross_entropy(recon_x.view(-1, imgSize*imgSize), x.view(-1, imgSize*imgSize), reduction = "sum") * imgSize * imgSize *3
    imgSize = parameters["imgSize"]
    Recon_loss = F.mse_loss(recon_x.view(-1, imgSize*imgSize), x.view(-1, imgSize*imgSize), reduction = "sum")
    return Recon_loss, Recon_loss
#     return Recon_loss_adapted, Recon_loss


In [None]:
# start carla simulatr beforehand
sampler = Sampler(s_width=512, s_height=512, cam_height=4, cam_zoom=50, cam_rotation=-18)
video = sampler.create_model_video(model, device)

In [None]:
# img = sampler.sample()
# 

In [None]:
# import cv2
# cv2.imwrite("test.png", img)

# plt.imshow(cv2.imread("test.png"))