In [1]:
from __future__ import print_function
%matplotlib inline
import random
import os
import numpy as np
import cv2
import torch
from skimage import io
from tqdm import tqdm
import matplotlib.pyplot as plt
import sewar.full_ref as fr

In [None]:
def rename_save(filename, input_dir, out_dir, scale=10000.):
    """
    convert .tif (landsat/sentinal) uint16 [0-10000] to float32 [0, 1]
    """
    #fn = filename.split(".")
    file = input_dir + filename
    img = io.imread(file)
    img[img < 0] = 1
    img[img > scale] = scale
    img = img.astype(np.float32) / scale
    
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    #else:
        #print("Warning: dir {} already exists".format(out_dir))

    out_f = "{}/{}".format(out_dir, filename)   
    io.imsave(out_f, img)

In [None]:
def split_data_percent(percent, input_dir, test_num = 0):
    """
    split data into train, validation and test. 
    
    percent: precent of training data. percent of val is 1 - percent.
    test_num: number of images in test. 

    """
    
    filenames = os.listdir(input_dir)
    #print(len(filenames))
    
    random.seed(100)  #230
    filenames.sort()
    random.shuffle(filenames)
    
    test_filenames = filenames[:test_num]

    split = int(percent * (len(filenames) - test_num))

    train_filenames = filenames[test_num : test_num + split]
    val_filenames = filenames[test_num + split:]
    
    
    filename_dic = {"train_filenames": train_filenames,
                   "val_filenames": val_filenames,
                   "test_filenames": test_filenames}
    return filename_dic

In [None]:
def split_data(val_num, test_num, input_dir):
    """
    split data into train, test and val.
    val_num: number of images in val
    test_num: number of images in test
    """
    filenames = os.listdir(input_dir)
    #print(len(filenames))
    
    random.seed(10)  #230
    filenames.sort()
    random.shuffle(filenames)
    
    split = len(filenames) - val_num - test_num

    train_filenames = filenames[:split]
    val_filenames = filenames[split:split+val_num]
    test_filenames = filenames[split+val_num:]
    
    #print(len(train_filenames), len(val_filenames))
    #print(val_filenames)
    #print(test_filenames)
    filename_dic = {"train_filenames": train_filenames,
                   "val_filenames": val_filenames,
                   "test_filenames": test_filenames}
    return filename_dic

In [None]:
### convert Landsat/Sentinal datasets from uint16 [0-10000] to float32 [0-1], split data into train/dev/test
data_dir_HR = "/home/Jing/SR/data/Training-1223/HR/"
data_dir_x3 = "/home/Jing/SR/data/Training-1223/LR-x3/"

output_dir = "/home/Jing/SR/data/Sentinel_Landsat/"

Sentinel = split_data_percent(0.9, data_dir_HR, 37)

for dirc in ['train', 'val', 'test']:
    out_dir_HR = output_dir + "{}/HR".format(dirc)
    out_dir_x3 = output_dir + "{}/x3".format(dirc)
    #print(out_dir_HR, out_dir_x3)
    #in_dir_HR_L = data_dir_L + "{}/{}-HR/".format(dirc, dirc)
    #in_dir_x4_L = data_dir_L + "{}/{}-LRx4/".format(dirc, dirc)
    
#     for filename in tqdm(NAIP["{}_filenames".format(dirc)]):
#         rename_save(filename, data_dir_HR_N, out_dir_HR, 255.)
#         x4_name = "x4_" + filename
#         #print(x4_file)
#         rename_save(x4_name, data_dir_x4_N, out_dir_x4, 255.)

    for filename in tqdm(Sentinel["{}_filenames".format(dirc)]):
        rename_save(filename, data_dir_HR, out_dir_HR, 10000.)
        name = filename.split("_")[1]
        x3_name = "LRx3_" + name
        #print(x3_name)
        rename_save(x3_name, data_dir_x3, out_dir_x3, 10000.)