# Req

In [1]:
import os
import sys
import numpy as np
import pandas as pd

import imageio
from skimage import transform
from skimage import img_as_float

import torch
from torch.utils import data

#from data_utils import create_or_load_statistics, create_distrib, normalize_images, data_augmentation, compute_image_mean


# utils

In [1]:
def normalize_images(data, _mean, _std):
    for i in range(len(_mean)):
        data[:, :, i] = np.subtract(data[:, :, i], _mean[i])
        data[:, :, i] = np.divide(data[:, :, i], _std[i])

def compute_image_mean(data):
    _mean = np.mean(np.mean(np.mean(data, axis=0), axis=0), axis=0)
    _std = np.std(np.std(np.std(data, axis=0, ddof=1), axis=0, ddof=1), axis=0, ddof=1)

    return _mean, _std

# Dataset

In [None]:
class NGValid(data.Dataset):
  def __init__(self, img_dir, mask_dir, output_path):

    self.img_dir = img_dir
    self.mask_dir = mask_dir
    self.images = os.listdir(img_dir)
    self.masks = os.listdir(mask_dir)


    self.output_path = output_path


    # data and label
    self.data, self.labels = self.load_images()
    #self.data[np.where(self.data < -1.0e+38)] = 0  # remove extreme negative values (probably NO_DATA values)
    #print(self.data.ndim, self.data.shape, self.data[0].shape, np.min(self.data), np.max(self.data),
    #          self.labels.shape, np.bincount(self.labels.astype(int).flatten()))

    if self.data.ndim == 4:  # if all images have the same shape
            self.num_channels = self.data.shape[-1]  # get the number of channels
    else:
            self.num_channels = self.data[0].shape[-1]  # get the number of channels

    self.num_classes = 2  # binary - two classes
    # negative classes will be converted into 2 so they can be ignored in the loss
    self.labels[np.where(self.labels < 0)] = 2

    print('num_channels and labels', self.num_channels, self.num_classes, np.bincount(self.labels.flatten()))

    #self.distrib, self.gen_classes = self.make_dataset()

    self.mean, self.std = compute_image_mean(self.data)

        
    def load_images(self):
        images = []
        masks = []
        for img in self.images:
            temp_image = imageio.imread(os.path.join(self.img_dir, img + '')).astype(np.float64)
            temp_image[np.where(temp_image < -1.0e+38)] = 0 # remove extreme negative values (probably NO_DATA values)
            
            images.append(temp_image)

        for msk in self.masks:
            temp_mask = imageio.imread(os.path.join(self.mask_dir, msk + '')).astype(int)
            temp_mask[np.where(temp_mask < -1.0e+38)] = 0

            masks.append(temp_mask)

        return np.asarray(images), np.asarray(masks)


    def __getitem__(self, index):
        # Reading items from list.
        cur_map, cur_x, cur_y = self.coords.iloc[index, 0], self.coords.iloc[index, 1], self.coords.iloc[index, 2]

        img = np.copy(self.data[cur_map][cur_x, cur_y, :])
        label = np.copy(self.labels[cur_map][cur_x, cur_y])

        # Normalization.
        normalize_images(img, self.mean, self.std)

        # Transposing back to image shape
        img = np.transpose(img, (2, 0, 1))

        # Turning to tensors.
        img = torch.from_numpy(img.copy())
        label = torch.from_numpy(label.copy())

        # Returning to iterator.
        return img.float(), label, cur_map, cur_x, cur_y

    def __len__(self):
        return len(self.data)

# debug

### parsing

In [66]:
import re

img_dir = '/content/drive/MyDrive/vali_exp/'
images = os.listdir(img_dir)

fn_parse = str(images[0].replace('.tif', ''))
print(type(fn_parse))
print(fn_parse) 

cur_map = str(re.split("_", fn_parse)[0])
cur_x = str(re.split("_", fn_parse)[1])
cur_y = str(re.split("_", fn_parse)[-1])


print(type(cur_x)) 
print("cur_x", cur_x) 

print(type(cur_y)) 
print("cur_y", cur_y) 

print(type(cur_map)) 
print("cur_map", cur_map)

<class 'str'>
0_-526836.719036524_3353652.33760551
<class 'str'>
cur_x -526836.719036524
<class 'str'>
cur_y 3353652.33760551
<class 'str'>
cur_map 0


### Skeleton

In [None]:
import re

img_dir = '/content/drive/MyDrive/vali_exp/'
images = os.listdir(img_dir)

#print(images)       # tile_names
#print(type(images)) # list

img_list = []
cur_maps = []
cur_xs = []
cur_ys = []

for img in images:

  temp_image = imageio.imread(os.path.join(img_dir, img + '')).astype(np.float64)
  temp_image[np.where(temp_image < -1.0e+38)] = 0 # remove extreme negative values (probably NO_DATA values)

  fn_parse = str(img.replace('.tif', ''))
  cur_map = str(re.split("_", fn_parse)[0])
  cur_x = str(re.split("_", fn_parse)[1])
  cur_y = str(re.split("_", fn_parse)[-1])

  cur_maps.append(cur_map)
  cur_xs.append(cur_x)
  cur_ys.append(cur_y)
  img_list.append(temp_image)



### function

In [16]:
#img_dir = '/content/drive/MyDrive/vali_exp/'
#images = os.listdir(img_dir)

def load(img_dir, images):

  img_list = []
  cur_maps = []
  cur_xs = []
  cur_ys = []

  for img in images:

    temp_image = imageio.imread(os.path.join(img_dir, img + '')).astype(np.float64)
    temp_image[np.where(temp_image < -1.0e+38)] = 0 # remove extreme negative values (probably NO_DATA values)

    fn_parse = str(img.replace('.tif', ''))
    cur_map = str(re.split("_", fn_parse)[0])
    cur_x = str(re.split("_", fn_parse)[1])
    cur_y = str(re.split("_", fn_parse)[-1])

    cur_maps.append(cur_map)
    cur_xs.append(cur_x)
    cur_ys.append(cur_y)
    img_list.append(temp_image)

  return np.asarray(img_list), cur_maps, cur_xs, cur_ys



In [None]:
imgs, cur_maps, cur_xs, cur_ys = load(img_dir, images)

print(type(imgs))
print(len(imgs))

print(type(cur_xs))
print(len(cur_xs))
print(cur_xs)