Copyright 2020 Google LLC.

Licensed under the Apache License, Version 2.0 (the \"License\")

In [2]:
#@title License
# Licensed under the Apache License, Version 2.0 (the \"License\");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an \"AS IS\" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Load the Impressionist Datasets

## First, download it from Kaggle and upload it to Google drive. Then, load it using the cells below.

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Single image to MPI example Colab

This Colab is part of code for the paper ___Single-view view synthesis with multiplane images___, and may be found at <br>https://github.com/google-research/google-research/tree/master/single_view_mpi.

The project site is at https://single-view-mpi.github.io/.

Choose __Run all__ from the Runtime menu to:
* set up the network and load our trained model,
* apply it to an RGB input to generate a 32-layer MPI,
* show individual MPI layers and synthesized disparity,
* render novel views from different camera positions,
* visualize the resulting MPI in an HTML-based viewer.


## Download library code, model weights, and example image.

In [4]:
!echo Fetching code from github...
!apt install subversion
!svn export --force https://github.com/google-research/google-research/trunk/single_view_mpi

!echo
!echo Fetching trained model weights...
!rm single_view_mpi_full_keras.tar.gz
!rm -rf single_view_mpi_full
!wget https://storage.googleapis.com/stereo-magnification-public-files/models/single_view_mpi_full_keras.tar.gz
!tar -xzvf single_view_mpi_full_keras.tar.gz

!echo
!echo Fetching example image...
!rm -f input.png
!wget https://single-view-mpi.github.io/mpi/7/input.png


Fetching code from github...
Reading package lists... Done
Building dependency tree       
Reading state information... Done
subversion is already the newest version (1.9.7-4ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.
A    single_view_mpi
A    single_view_mpi/README.md
A    single_view_mpi/example.ipynb
A    single_view_mpi/libs
A    single_view_mpi/libs/geometry.py
A    single_view_mpi/libs/geometry_test.py
A    single_view_mpi/libs/mpi.py
A    single_view_mpi/libs/mpi_test.py
A    single_view_mpi/libs/nets.py
A    single_view_mpi/libs/utils.py
A    single_view_mpi/libs/utils_test.py
A    single_view_mpi/requirements.txt
A    single_view_mpi/run.sh
Exported revision 9726.

Fetching trained model weights...
--2022-03-19 01:45:01--  https://storage.googleapis.com/stereo-magnification-public-files/models/single_view_mpi_full_keras.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 64.233.189.128, 108.177.125.128, 142.250.157.128, ...
Connect

## Set up the model


In [5]:
pip install -r single_view_mpi/requirements.txt



In [6]:
import tensorflow as tf

from single_view_mpi.libs import mpi
from single_view_mpi.libs import nets

input = tf.keras.Input(shape=(None, None, 3))
output = nets.mpi_from_image(input)

model = tf.keras.Model(inputs=input, outputs=output)
print('Model created.')
# Our full model, trained on RealEstate10K.
model.load_weights('single_view_mpi_full_keras/single_view_mpi_keras_weights')
print('Weights loaded.')


Model created.
Weights loaded.


# Load the samples of data we want to analyze

In [7]:
import os
FILE_PATH = "gdrive/MyDrive/cs 231a/realestate10k/"
os.chdir(FILE_PATH)

In [8]:
paintings = os.listdir()
paintings[0]

'40173467.jpg'

In [9]:
len(paintings)

295

In [10]:
import random
sample_sz = 100
random_sample = random.sample(paintings, sample_sz)
random_sample

['51151100.jpg',
 '40073367.jpg',
 '55121000.jpg',
 '39205833.jpg',
 '57624000.jpg',
 '50950900.jpg',
 '40140100.jpg',
 '59960000.jpg',
 '55155000.jpg',
 '50717333.jpg',
 '59259000.jpg',
 '55889000.jpg',
 '58158000.jpg',
 '50683967.jpg',
 '56356000.jpg',
 '38905533.jpg',
 '56223000.jpg',
 '41107733.jpg',
 '55722000.jpg',
 '41007633.jpg',
 '50417033.jpg',
 '50550500.jpg',
 '55422000.jpg',
 '39239200.jpg',
 '54854000.jpg',
 '54221000.jpg',
 '58592000.jpg',
 '57724000.jpg',
 '59660000.jpg',
 '39739700.jpg',
 '58292000.jpg',
 '39172467.jpg',
 '50250200.jpg',
 '59126000.jpg',
 '56657000.jpg',
 '58725000.jpg',
 '57691000.jpg',
 '57557000.jpg',
 '55222000.jpg',
 '56590000.jpg',
 '58025000.jpg',
 '39973267.jpg',
 '57824000.jpg',
 '57924000.jpg',
 '59159000.jpg',
 '38872167.jpg',
 '40573867.jpg',
 '41474767.jpg',
 '38972267.jpg',
 '40006633.jpg',
 '39773067.jpg',
 '39072367.jpg',
 '56390000.jpg',
 '58859000.jpg',
 '59192000.jpg',
 '54421000.jpg',
 '39706333.jpg',
 '56423000.jpg',
 '51251200.jpg

# Calculate PSNR and SSIM

Sourced from: https://cvnote.ddlee.cc/2019/09/12/psnr-ssim-python

In [11]:
import math
import numpy as np

def calculate_psnr(img1, img2):
    # img1 and img2 have range [0, 255]
    # img1 = img1.astype(np.float64)
    # img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(255.0 / math.sqrt(mse))

import math
import numpy as np
import cv2

def ssim(img1, img2):
    C1 = (0.01 * 255)**2
    C2 = (0.03 * 255)**2

    # img1 = img1.astype(np.float64)
    # img2 = img2.astype(np.float64)
    kernel = cv2.getGaussianKernel(11, 1.5)
    window = np.outer(kernel, kernel.transpose())

    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]  # valid
    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
    mu1_sq = mu1**2
    mu2_sq = mu2**2
    mu1_mu2 = mu1 * mu2
    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2

    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
                                                            (sigma1_sq + sigma2_sq + C2))
    return ssim_map.mean()


def calculate_ssim(img1, img2):
    '''calculate SSIM
    the same outputs as MATLAB's
    img1, img2: [0, 255]
    '''
    if not img1.shape == img2.shape:
        raise ValueError('Input images must have the same dimensions.')
    if img1.ndim == 2:
        return ssim(img1, img2)
    elif img1.ndim == 3:
        if img1.shape[2] == 3:
            ssims = []
            for i in range(3):
                ssims.append(ssim(img1, img2))
            return np.array(ssims).mean()
        elif img1.shape[2] == 1:
            return ssim(np.squeeze(img1), np.squeeze(img2))
    else:
        raise ValueError('Wrong input image dimensions.')

In [21]:
inputfile = random_sample[0]
input_rgb = tf.image.decode_image(tf.io.read_file(inputfile), dtype=tf.float32)

# Generate MPI
# image: (1, 1024, 1024, 3)
print(input_rgb.shape)
N, H, W, C = input_rgb[tf.newaxis].shape
# enforce square representation
arr = np.zeros((1, 1024 - H, W, C))
representation = np.append(input_rgb[tf.newaxis], arr, axis=1)
print(representation.shape)
representation = representation[0:1, 0:1024, 0: 1024, 0:3]
print(representation.shape)
print(input_rgb[tf.newaxis].shape)
layers = model(representation)[0]
print(layers[0].shape)

(720, 1280, 3)
(1, 1024, 1280, 3)
(1, 1024, 1024, 3)
(1, 720, 1280, 3)
(1024, 1024, 4)


In [32]:
# calculate SSIM and PSNR for samples
num_layers = 32
# ssims = np.zeros((sample_sz, num_layers))
# psnrs = np.zeros((sample_sz, num_layers))
ssims = np.zeros(sample_sz)
psnrs = np.zeros(sample_sz)
for i in range(sample_sz):
  if (i % 10 == 0):
    print(i)
  # Input image
  inputfile = random_sample[i]
  input_rgb = tf.image.decode_image(tf.io.read_file(inputfile), dtype=tf.float32)

  # Generate MPI
  # print(input_rgb.shape)
  N, H, W, C = input_rgb[tf.newaxis].shape
  # enforce square representation for 1024 x 1024 images
  arr = np.zeros((1, 1024 - H, W, C))
  representation = np.append(input_rgb[tf.newaxis], arr, axis=1)
  representation = representation[0:1, 0:1024, 0: 1024, 0:3]
  layers = model(representation)[0]

  # Note: this only selects RGB layers and not output
 
  # for j in range(len(layers)):
    # ssims[i][j] = calculate_ssim(input_rgb.numpy(), layers[j][:, :, 0:3].numpy())
    # psnrs[i][j] = calculate_psnr(input_rgb, layers[j][:, :, 0:3])
    # calculate for the first layer
  ssims[i] = calculate_ssim(representation.reshape((1024, 1024, 3)), layers[0][:, :, 0:3].numpy())
  psnrs[i] = calculate_psnr(representation.reshape((1024, 1024, 3)), layers[0][:, :, 0:3])

print("mean ssim:")
print(ssims.mean())
print("mean psnr:")
print(psnrs.mean())

0
10
20
30
40
50
60
70
80
90
mean ssim:
0.999903337703274
mean psnr:
77.83639952305774
