<a href="https://colab.research.google.com/github/Lindronics/honours_project/blob/master/notebooks/autoencoder/RGB_FIR_Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install tensorflow-gpu==2.1
!pip install grpcio==1.24.3

In [0]:
import tensorflow as tf
from tensorflow import keras as K
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import cv2
from sklearn.model_selection import train_test_split
from datetime import datetime
import os

In [0]:
def hflip(img):
    img = cv2.flip(img, 1)[..., None]
    return img

def crop(img, max_zoom=0.5):
    shape = img.shape

    zoom = np.random.uniform(0, max_zoom)
    width = int((1-zoom) * shape[0])
    height = int((1-zoom) * shape[1])

    left = np.random.randint(0, shape[0] - width)
    right = left + width

    start = np.random.randint(0, shape[1] - height)
    end = start + height

    img = img[left:right, start:end]
    img = cv2.resize(img, shape[:2][::-1]) / 255
    return img[..., None]

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
!rm -rf person
!rm -rf person2
!cp -r drive/My\ Drive/Datasets/person .
!cp -r drive/My\ Drive/Datasets/person_2 .

In [0]:
trans = np.array([
    [1.202290, -0.026808, -50.528589],
    [0.017762, 1.203090, -73.950204],
])

input_shape = (320, 240, 1)

def load_data(path):
    with open(os.path.join(path, "metadata.json"), "r") as f:
        labels = json.load(f)["labels"]

    rgb_data = []
    fir_data = []

    for fname, label in labels.items():
        
        if label == 0:
            continue

        # print(fname, os.path.exists("person/rgb/rgb_" + fname), os.path.exists("person/fir/fir_" + fname))
        
        rgb = cv2.imread(os.path.join(path, "rgb/rgb_" + fname)) / 255
        rgb = np.mean(rgb, axis=-1)
        rgb = cv2.resize(rgb, (640, 480))
        rgb = cv2.warpAffine(rgb, trans, (rgb.shape[1], rgb.shape[0]))
        rgb = cv2.resize(rgb, (input_shape[1], input_shape[0]))[..., None]
        rgb_data.append(rgb)

        rgb_data.append(hflip(rgb))
        # rgb_data.append(crop(rgb))

        fir = cv2.imread(os.path.join(path, "fir/fir_" + fname)) / 255
        fir = cv2.resize(fir, (input_shape[1], input_shape[0]))
        fir = np.mean(fir, axis=-1)[..., None]
        fir_data.append(fir)

        fir_data.append(hflip(fir))
        # fir_data.append(crop(fir))

    return np.array(rgb_data), np.array(fir_data)

X, y = load_data("person")
X2, y2 = load_data("person_2")

In [0]:
# rgb_data, fir_data = load_data("person")

X = np.concatenate([X, X2])
y = np.concatenate([y, y2])

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, train_size=0.8)
# X_train, y_train = rgb_data, fir_data
print(X_train.shape)

i = 118
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(X_train[i, ..., 0], cmap="gray")
ax2.imshow(y_train[i, ..., 0], cmap="gray")

In [0]:
model = K.Sequential()

model.add(K.layers.Input(shape=input_shape))

# Encoder
model.add(K.layers.Conv2D(8, kernel_size=(5, 5), strides=(1, 1), padding="same"))
model.add(K.layers.LeakyReLU())
model.add(K.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))

model.add(K.layers.Dropout(0.4))

model.add(K.layers.Conv2D(16, kernel_size=(7, 7), strides=(1, 1), padding="same"))
model.add(K.layers.LeakyReLU())
model.add(K.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))

model.add(K.layers.Dropout(0.4))

model.add(K.layers.Conv2D(32, kernel_size=(7, 7), strides=(1, 1), padding="same"))
model.add(K.layers.LeakyReLU())
model.add(K.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))

model.add(K.layers.Dropout(0.4))

model.add(K.layers.Conv2D(64, kernel_size=(7, 7), strides=(1, 1), padding="same"))
model.add(K.layers.LeakyReLU())
model.add(K.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))

model.add(K.layers.Dropout(0.4))

# Decoder
model.add(K.layers.Conv2DTranspose(32, kernel_size=(7, 7), strides=(2, 2), padding="same"))
model.add(K.layers.LeakyReLU())

model.add(K.layers.Conv2DTranspose(16, kernel_size=(7, 7), strides=(2, 2), padding="same"))
model.add(K.layers.LeakyReLU())

model.add(K.layers.Conv2DTranspose(8, kernel_size=(7, 7), strides=(2, 2), padding="same"))
model.add(K.layers.LeakyReLU())

model.add(K.layers.Conv2DTranspose(1, kernel_size=(5, 5), strides=(2, 2), padding="same"))
model.add(K.layers.LeakyReLU())

# model.add(K.layers.Lambda(lambda x: x[:, 2:-1, 2:-1, :]))

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])
model.summary()

In [0]:
# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

In [0]:
%%time
# !rm -rf logs
log_dir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True)

model.fit(X_train, y_train, batch_size=4, epochs=300, callbacks=[tensorboard_callback], verbose=1, validation_data=(X_test, y_test))

In [0]:
# %reload_ext tensorboard
# %tensorboard --logdir logs/fit

In [0]:
i = np.random.randint(0, X_test.shape[0] - 1)
# i = np.random.randint(0, X_train.shape[0] - 1)
# i = 35
print(i)

# test_image = X_train[i]
# ref_image = y_train[i]
test_image = X_test[i]
ref_image = y_test[i]

predicted_image = model.predict(test_image[None, ...])

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 10))
ax1.imshow(test_image[..., 0], cmap="gray")
ax1.set_title("Visible light")
ax2.imshow(predicted_image[0, ..., 0], cmap="gray")
ax2.set_title("Predicted FIR")
ax3.imshow(ref_image[..., 0], cmap="gray")
ax3.set_title("Actual FIR")
pass

In [0]:
# i = np.random.randint(0, X_test.shape[0] - 1)
# i = np.random.randint(0, X_train.shape[0] - 1)
i = 35
print(i)

# test_image = X_train[i]
# ref_image = y_train[i]
test_image = X_test[i]
ref_image = y_test[i]

predicted_image = model.predict(test_image[None, ...])

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 10))
ax1.imshow(test_image[..., 0], cmap="gray")
ax1.set_title("Visible light")
ax2.imshow(predicted_image[0, ..., 0], cmap="gray")
ax2.set_title("Predicted FIR")
ax3.imshow(ref_image[..., 0], cmap="gray")
ax3.set_title("Actual FIR")
pass