# Experimentation with UMAP
Technique for dimensional reduction.  
Here we try to represent each sample on a dot on a 2d plan and see the pattern.

In [None]:
import junodch_utils_read_img as utils

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tqdm.keras import TqdmCallback

import rasterio
import keras
from keras import layers

import umap
from umap.parametric_umap import ParametricUMAP

# Data preparation
### Fetch data from file

In [None]:
folderName = "img/Sokoto/"
pathSatellite = folderName + "Sentinel-2.tif"
pathNight = folderName + "Night_VIIRS.tif"
pathValidation = folderName + "Population_GHSL.tif"

aoi = utils.getImgBorder(pathSatellite)

# Fetch coords
dataCoords, dataRadiance = utils.getTilesCoordsPerimeter(pathNight, area=aoi)
print('Tiles:',dataCoords.shape[0])

In [None]:
lightMask = (dataRadiance>0)
lightCoords = dataCoords[lightMask]
darkMask = (dataRadiance==0)

idxDarkData = np.random.choice(np.arange(dataCoords.shape[0])[darkMask], len(lightCoords), replace=False)
darkCoords = dataCoords[idxDarkData]

trainCoords = np.concatenate((lightCoords, darkCoords))

with rasterio.open(pathSatellite) as f:
  trainData, _ = utils.coordsToImgsFormated(f, trainCoords, res=64)
print(trainData.shape)

print('Light Tile:',len(lightCoords))
print('dark Tile:',len(darkCoords))
print('Total train',trainData.shape)

train = trainData
validation = np.concatenate((dataRadiance[lightMask], dataRadiance[idxDarkData]))

In [None]:
print(train.shape)
trainDataFormated = train.reshape(train.shape[0], -1)
print('shape',trainDataFormated.shape)

In [None]:
def displayResultUMAP(embedding):
  fig, ax2 = plt.subplots( figsize=(5, 4))
  sc = ax2.scatter(
      embedding[:, 0],
      embedding[:, 1],
      c= validation,
      cmap='rainbow',
      s=5,
      alpha=0.3,
      rasterized=True,
  )
  ax2.axis('equal')
  plt.colorbar(sc, ax=ax2)

# UMAP
First experiment with standard umap.

In [None]:
embedding = umap.UMAP(random_state=50).fit_transform(trainDataFormated)
displayResultUMAP(embedding)

In [None]:
displayResultUMAP(embedding)

Attempt with parametricUMAP

In [None]:
input_shape=train.shape[1:]
print(input_shape)

encoder = keras.Sequential([
  layers.Conv2D(12,(3,3), 2, padding='same', activation='relu', input_shape=input_shape),
  layers.Conv2D(12,(3,3), 2, padding='same', activation='relu'),
  layers.Conv2D(12,(3,3), 2, padding='same', activation='relu'),

  layers.Flatten(),
  layers.Dense(units=2, name='encoder'),
])

decoder = keras.Sequential([
  layers.Dense(np.prod((8,8,12)), activation='relu', input_shape=(2,)),
  layers.Reshape(target_shape=(8,8,12)),
  layers.Conv2DTranspose(12,(3,3), 2, padding='same', activation='relu'),
  layers.Conv2DTranspose(12,(3,3), 2, padding='same', activation='relu'),
  layers.Conv2DTranspose(3, (3,3), 2, padding='same', activation='sigmoid'),
])

lossFunction = keras.losses.MeanSquaredError() # l2

embedder = ParametricUMAP(
  encoder=encoder,
  decoder=decoder,
  autoencoder_loss=True,
  parametric_reconstruction_loss_fcn=lossFunction,
  dims=input_shape,
  parametric_reconstruction=True,
  parametric_embedding=False,
  #n_training_epochs = 1,
  loss_report_frequency=20,
  keras_fit_kwargs={
    "callbacks": [TqdmCallback(verbose=1)],
    "verbose": 0,
  },
  verbose=False,
)

embedding = embedder.fit_transform(trainDataFormated)

fig, ax = plt.subplots()
ax.plot(embedder._history['loss'])
ax.set_ylabel('Cross Entropy')
ax.set_xlabel('Epoch')

In [None]:
displayResultUMAP(embedding)

In [None]:
embedding = encoder.predict(train)
displayResultUMAP(embedding)

In [None]:
def displayAutoencoderUmapResults(autoencoder, dataInput, precision=0, isEmbedded=True):
  MAX_ON_ROW = 20
  total = dataInput.shape[0]
  nRow = (dataInput.shape[0] // MAX_ON_ROW) + 1
  nCol = MAX_ON_ROW if total > MAX_ON_ROW else total

  # Display original
  plt.figure(figsize=(30,nRow*2))
  for i in range(0, total):
    ax = plt.subplot(nRow, nCol, 1+i)
    plt.imshow(dataInput[i])
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
  print("Original data:",dataInput.shape)
  plt.show()

  # Display reconstruction
  if isEmbedded:
    decoded_imgs = autoencoder.inverse_transform(autoencoder.transform(dataInput))
  else:
    test = []
    for d in dataInput:
      test.append(tf.reshape(d, [-1]))
    decoded_imgs = autoencoder.inverse_transform(autoencoder.transform(test))
  plt.figure(figsize=(30,nRow*2))
  print("Output data:",decoded_imgs.shape)
  for i in range(0, decoded_imgs.shape[0]):
    ax = plt.subplot(nRow, nCol, 1+i)
    if isEmbedded:
      decoded_img = decoded_imgs[i]
    else:
      decoded_img = tf.reshape(decoded_imgs[i], [64,64,3])
    plt.imshow(decoded_img)
    score = lossFunction(dataInput[i], decoded_img)
    plt.title(np.round(score,precision))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
  plt.show()

In [None]:
displayAutoencoderUmapResults(embedder, trainData[::100], precision=5, isEmbedded=False)