In [0]:
import pandas as pd
import os 
import cv2
from PIL import Image
import numpy as np
import glob
from collections import defaultdict
import shutil
import skimage

# Download Data


In [2]:
!mkdir data_path
data_path = '/content/data_path'
!wget 'https://kelvins.esa.int/media/competitions/proba-v-super-resolution/probav_data.zip' --no-check-certificate -P data_path

--2020-01-03 13:32:32--  https://kelvins.esa.int/media/competitions/proba-v-super-resolution/probav_data.zip
Resolving kelvins.esa.int (kelvins.esa.int)... 195.169.140.114
Connecting to kelvins.esa.int (kelvins.esa.int)|195.169.140.114|:443... connected.
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 200 OK
Length: 708416603 (676M) [application/zip]
Saving to: ‘data_path/probav_data.zip’


2020-01-03 13:32:50 (37.7 MB/s) - ‘data_path/probav_data.zip’ saved [708416603/708416603]



In [3]:
!unzip '/content/data_path/probav_data.zip' -d data_path

Archive:  /content/data_path/probav_data.zip
   creating: data_path/test/
   creating: data_path/test/NIR/
   creating: data_path/test/NIR/imgset1306/
  inflating: data_path/test/NIR/imgset1306/LR000.png  
  inflating: data_path/test/NIR/imgset1306/LR001.png  
  inflating: data_path/test/NIR/imgset1306/LR002.png  
  inflating: data_path/test/NIR/imgset1306/LR003.png  
  inflating: data_path/test/NIR/imgset1306/LR004.png  
  inflating: data_path/test/NIR/imgset1306/LR005.png  
  inflating: data_path/test/NIR/imgset1306/LR006.png  
  inflating: data_path/test/NIR/imgset1306/LR007.png  
  inflating: data_path/test/NIR/imgset1306/LR008.png  
  inflating: data_path/test/NIR/imgset1306/LR009.png  
  inflating: data_path/test/NIR/imgset1306/LR010.png  
  inflating: data_path/test/NIR/imgset1306/LR011.png  
  inflating: data_path/test/NIR/imgset1306/LR012.png  
  inflating: data_path/test/NIR/imgset1306/LR013.png  
  inflating: data_path/test/NIR/imgset1306/LR014.png  
  inflating: data_path/t

# Pre-processing NIR & RED channel

In [0]:
base_dir ='/content/data_path'

In [0]:
# Load images as np.array uint16
def load_image( infilename ) :
    img = Image.open( infilename )
    img.load()
    data = np.asarray( img, dtype="uint16" )
    return data

In [0]:
# Normalization
def normalize(image):
    return (image - image.min()) / (image.max() - image.min())

In [0]:
#Load HR images uint16, Shift them by two to get 14bits values,then convert to float64 
def load_and_normalize_hr(scene_path, normalize=False):
    hr = skimage.io.imread(scene_path + '/HR.png')
    hr = skimage.img_as_float64(hr << 2)
    hr = skimage.color.gray2rgb(hr, alpha=None)
    hr = hr.reshape(hr.shape[0],hr.shape[1],3)
    if normalize:
        return normalize(hr)
    else:
        return hr

In [0]:
def preprocessing(band,mode):
  # To Compute the PSNR
  # norm baseline for each imageset to normalize cPSNR

  df_norm=pd.read_csv(os.path.join(base_dir, 'norm.csv'),sep=' ',header=None)
  df_norm.columns=['set','norm']
  scenes_dir = os.path.join(base_dir, '{}/{}'.format(mode,band))
  dir_list=sorted([os.path.basename(x) for x in glob.glob(scenes_dir+'/imgset*')])
  norm=df_norm.loc[df_norm['set'].isin(dir_list)]['norm'].values
  norm=norm.reshape([norm.shape[0],1])

  ##  Parse Dataset and check for any high pixels
  ##  and remove the correspond image
  ##  Tests conducted on both 60k and 65k and the results was
  ##  the same
  images_to_remove = []

  for i,rep in enumerate(dir_list):
      images = sorted(glob.glob(scenes_dir+'/'+rep+'/LR*'))
      for j,image in enumerate(images):
          image_array = load_image(image)
          if (image_array>60000).any() :
              images_to_remove.append([i,j])
              
  img_dico =defaultdict(list)
  for i in images_to_remove:
      img_dico[i[0]].append(i[1])

  ##  Remove the images and the corresponding masks using img_dico 
  ##  Tests conducted on both 60k and 65k and the results was
  ##  the same

  for i,rep in enumerate(dir_list):
      masks = sorted(glob.glob(scenes_dir+'/'+rep+'/QM*'))
      images = sorted(glob.glob(scenes_dir+'/'+rep+'/LR*'))
      for rm in img_dico[i]:
          os.remove(masks[rm])
          os.remove(images[rm])

  imgset_selected = []
  for rep in dir_list:
      images = sorted(glob.glob(scenes_dir+'/'+rep+'/LR*.png'))
      masks = sorted(glob.glob(scenes_dir+'/'+rep+'/QM*.png'))
      #Combining both LR_images with QM_masks
      for image, mask in zip(images,masks):
        masked_images = [(skimage.io.imread(image), skimage.io.imread(mask))]
        patches_images = []
        patches_masks =  []
      for image, mask in masked_images:
          #Split the images in horizontal and vertical patches
          row_patches_images = [np.hsplit(arr,8) for arr in np.vsplit(image,8)]
          
          row_patches_masks = [np.hsplit(arr,8) for arr in np.vsplit(mask,8)]
          
          patches_images.append([row_patches_images[i][j] for i in range(len(row_patches_images)) for j in range(len(row_patches_images[i]))])
          
          patches_masks.append([row_patches_masks[i][j] for i in range(len(row_patches_masks)) for j in range(len(row_patches_masks[i]))])
      #Check for patch_mask scores
      mask_score = [[np.sum(patch_mask) for patch_mask in mask] for mask in patches_masks]
    
      combo_patches = [[score[i] for score in mask_score] for i in range(len(mask_score[0]))]
      #Find the best patch_mask scores
      best_patches = [sorted(range(len(patch)), key = lambda t: patch[t], reverse=True)[0] for patch in combo_patches]
      #Get the respective index of patch_mask on patch_image
      img_selected = [patches_images[i][j] for j,i in enumerate(best_patches)]
      
      # Reconstruct the image from patches and apply median filter on it
      row_patches = [img_selected[i:i+8] for i in np.arange(0,64,8)]
      row_patches = [np.hstack(row_patches[i]) for i in range(len(row_patches))]
      constructed_img = np.vstack(row_patches)
      constructed_img = skimage.img_as_float64(constructed_img<<2)
      constructed_img = skimage.color.gray2rgb(constructed_img, alpha=None)
      constructed_img = constructed_img.reshape(constructed_img.shape[0],constructed_img.shape[1],3)
      imgset_selected.append(constructed_img)
  imgset_arr = np.array(imgset_selected)
  np.save(data_path+'/{}_LR_{}.npy'.format(band,mode), imgset_arr)
  #Loading HR and preprocessing it
  if mode == 'train':
    hr_imgs = []
    for rep in dir_list:
      hr = load_and_normalize_hr(scenes_dir+'/'+rep)
      hr_imgs.append(hr)
    hrs_arr= np.array(hr_imgs)
    np.save(data_path+'/{}_HR_{}.npy'.format(band,mode), imgset_arr)

In [0]:
def combine_bands(resolution,mode):
  nir = np.load('/content/data_path/NIR_{}_{}.npy'.format(resolution,mode))
  red = np.load('/content/data_path/RED_{}_{}.npy'.format(resolution,mode))
  nired = np.vstack([nir,red])
  np.save(data_path+'/{}_{}.npy'.format(resolution,mode), nired)

#Preprocessing on Train set

In [0]:
preprocessing('NIR','train')

In [0]:
preprocessing('RED','train')

In [0]:
combine_bands('LR','train')

In [0]:
combine_bands('HR','train')

# Preprocessing on Test set

In [0]:
preprocessing('NIR','test')

In [0]:
preprocessing('RED','test')

In [0]:
combine_bands('LR','test')