# Prepare DeepGlobe for our experiments

we need to prepare the deepglobe dataset for the training of our models

In [1]:
%load_ext autoreload

In [2]:
%autoreload
import numpy as np
import pandas as pd
import sys
import os
import glob
import imageio
import time
import shutil
import matplotlib.pyplot as plt

from scipy import ndimage, signal
from cv2 import medianBlur, cvtColor, COLOR_RGB2GRAY

sys.path.append("..")

from PIL import Image
from costum_arild.source.utils import image_processing_utils, gdal_utils
from costum_arild.source.data_processing import TrainingImage

2023-06-03 14:58:31.758224: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
def replace_rgb_to_class_deepglobe(img_arr, 
                                   unknown_zero_flag=False, 
                                   color_matrix=False):
    """
    Convers labels with RGB colors as value to number of class as value
    by default:
        0 : water
        5: unknown
    if unknown_zero_flag is set to ture:
        0 : unknown
        5: water
    """
    
    replaced_image_onehot = image_processing_utils.one_hot_encode(
        img_arr.astype(np.uint8), color_matrix)
    return image_processing_utils.one_hot_decode(replaced_image_onehot)

## 3d to 2d, rgb to label

we need to have 2d labels for the models we have

In [4]:
# list of valid colors of deepglobe
color_matrix = np.array([[0,255,255], #urban land
                         [255,255,0], #agrculture land
                         [255,0,255], #range land
                         [0,255,0], #forst land
                         [0,0,255], #water
                         [255,255,255], #barren land
                         [0,0,0]], # unknown
                        dtype=np.uint8)

color_matrix_float = color_matrix / 255

# list of all images
src_image_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/CodaLab/land-train/land-train'
dest_folder_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/label_2d'

In [9]:
for idx, image_path in enumerate(glob.glob(os.path.join(src_image_path, '*mask.png'))):
    start = time.time()
    if idx % 100 == 0:
        print(f'working on image {os.path.split(image_path)[-1]}')
    
    dest_path = os.path.join(dest_folder_path, os.path.split(image_path)[-1])
    # read image
    original_image_matrix = imageio.imread(image_path)
    if original_image_matrix.shape[-1] > 3:
        print('image has more than 3 channels, only first 3 channels are used')
        original_image_matrix = original_image_matrix[:,:,:3]
    
    # change rgb to 2d_class labels
    replaced_image_cleaned = replace_rgb_to_class_deepglobe(
        original_image_matrix, 
        color_matrix=color_matrix)
    
    # save to file
    replaced_image_png = Image.fromarray(replaced_image_cleaned.astype(np.uint8))
    replaced_image_png.save(dest_path)
    
    end = time.time()
    if idx % 100 == 0:
        print(f'the time takes {end - start}')

working on image 949235_mask.png
the time takes 1.0956294536590576
working on image 940229_mask.png
the time takes 0.9181716442108154
working on image 256189_mask.png
the time takes 0.9559581279754639
working on image 601966_mask.png
the time takes 0.7787680625915527
working on image 387018_mask.png
the time takes 0.8121402263641357
working on image 761189_mask.png
the time takes 0.8142452239990234
working on image 68078_mask.png
the time takes 0.8014872074127197
working on image 848649_mask.png
the time takes 0.8755347728729248
working on image 172854_mask.png
the time takes 0.8579421043395996


# Covert RGB to Grayscale

In [19]:
# list of all images
src_image_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/CodaLab/land-train/land-train'
dest_folder_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/image_grayscale'

all_image_paths = glob.glob(os.path.join(src_image_path, '*_sat.jpg'))

In [20]:
# 
for idx, img_path in enumerate(all_image_paths):
    # get the name
    img_name = os.path.split(img_path)[-1]
    # read the image
    img_matrix = image_processing_utils.read_png_file(img_path)
    # convert to grayscale
    img_matrix_gray = cvtColor(img_matrix, COLOR_RGB2GRAY)
    
    # save image
    image_processing_utils.save_to_png(img_array=img_matrix_gray, 
                                       img_path=os.path.join(dest_folder_path, 
                                                             f'{img_name}.png'))
    if idx % 100 == 0:
        print(f'working on image {img_name}')

working on image 164029_sat.jpg
working on image 990573_sat.jpg
working on image 940229_sat.jpg
working on image 21717_sat.jpg
working on image 835147_sat.jpg
working on image 45676_sat.jpg
working on image 204562_sat.jpg
working on image 584865_sat.jpg
working on image 358591_sat.jpg


# Move the val used in MagNet here as well

we read all the training file and move the images that are described to be val, into the validation folder

In [47]:
# load the csv
val_csv_path = '/home/saeid/phd/segmentation/MagNet-main/data/list/deepglobe/val.txt'

src_images = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/train'

dst_images = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/val_ish'
os.makedirs(dst_images, exist_ok=True)

In [49]:
with open(val_csv_path) as f:
    for file_line in f:
        img_src_path, lbl_src_path = file_line.replace('\n','').split('\t')
        img_src_name = img_src_path.replace('land-train', 'image_grayscale').replace('jpg', 'png')
        lbl_src_name = lbl_src_path.replace('land-train', 'label_2d')      
        
        # make the overall src path
        img_src_path = os.path.join(src_images, img_src_name)
        lbl_src_path = os.path.join(src_images, lbl_src_name)
        
        # make the overall dst path
        img_dst_path = os.path.join(dst_images, img_src_name)
        lbl_dst_path = os.path.join(dst_images, lbl_src_name)
        
        os.makedirs(img_dst_path, exist_ok=True)
        os.makedirs(lbl_dst_path, exist_ok=True)
        
        shutil.move(img_src_path, img_dst_path)
        shutil.move(lbl_src_path, lbl_dst_path)
        
#         break

In [50]:
lbl_src_path, lbl_dst_path

('/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/train/label_2d/935193_mask.png',
 '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/val_ish/label_2d/935193_mask.png')

# devide the image

we want to do exactly as they did in MagNet
so we read the file and transfer the images to the correct place

In [53]:
all_image_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/train/image_grayscale/'
all_label_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/train/label_2d'

all_label_paths = glob.glob(os.path.join(all_label_path, '*.png'))
all_image_paths = glob.glob(os.path.join(all_image_path, '*.png'))

# get all the image names
all_label_names = [os.path.split(name)[-1] for name in all_label_paths]
all_image_names = [os.path.split(name)[-1] for name in all_image_paths]

all_label_paths.sort()
all_image_paths.sort()

all_label_names.sort()
all_image_names.sort()

dst_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/512/train'

In [54]:
# check if label and images are the same
for idx, _ in enumerate(all_label_paths):
    lbl_name = os.path.split(all_label_paths[idx])[-1].replace('mask', 'sat')
    img_name = os.path.split(all_image_paths[idx])[-1]
    
    if lbl_name != img_name:
        print(lbl_name, img_name)
        break
print(idx)

595


In [55]:
image_size = 512
# determines if we need to show messages or not
UNKNOWN_CLASS = 6
small_image_list = []
small_label_list = []
verbose = False

dst_path_img = os.path.join(dst_path, 'image', 'img')
dst_path_lbl = os.path.join(dst_path, 'label', 'img')

os.makedirs(dst_path_img, exist_ok=True)
os.makedirs(dst_path_lbl, exist_ok=True)

for idx, img_path in enumerate(all_image_paths):    
    if verbose:
        if idx % 100 == 0:
            print(f"working on image {all_image_names[idx].replace('.png', '')}")
    start = time.time()
    large_img_arr = image_processing_utils.read_png_file(image_path=all_image_paths[idx])
    large_lbl_arr = image_processing_utils.read_png_file(image_path=all_label_paths[idx])
#     large_lbl_arr_2d = image_processing_utils.replace_rgb_to_class(large_lbl_arr, 
#                                                                    unknown_zero_flag=True)

    current_time = time.time()
    if verbose:
        if idx % 100 == 0:
            print(f'loading images took: {current_time - start}')
    
    # now we dont need to rotate the image
    # rotate image by angle
    # rotated_lbl_arr_2d = image_processing_utils.rotate_image_skimage(angle=30, 
    #                                                                      img=large_lbl_arr_2d)
    
#     # we might need to swap the value of unknown class
#     UNKNOWN_CLASS = 0
#     large_lbl_arr_2d = image_processing_utils.swap_value_np(array=large_lbl_arr_2d, val1=5, val2=0)
#     current_time = time.time()
#     if verbose:
#         print(f'swapping classes took: {current_time - start}')
    
    image_processing_utils.devide_small_image(
        large_img_arr=large_img_arr, 
        large_lbl_arr_2d = large_lbl_arr, 
        image_size=image_size, 
        dst_path_lbl=dst_path_lbl, 
        dst_path_img=dst_path_img,
        unknown_class=UNKNOWN_CLASS, 
        large_image_name=all_image_names[idx])
    
    current_time = time.time()
    if verbose:
        if idx % 100 == 0:
            print(f'deviding took: {current_time - start}')


# checkin the images to make sure we have same data type

In [9]:
img_gray_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/image_grayscale/119_sat.png'
img_rgb_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/MagNet/land-train/119_sat.jpg'

# read the images
grayscale_matrix = image_processing_utils.read_png_file(img_gray_path)
rgb_matrix = image_processing_utils.read_png_file(img_rgb_path)

In [13]:
np.mean(rgb_matrix), np.mean(grayscale_matrix)

(62.86483358208025, 67.42821153856423)

In [None]:
all_image_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/train/image_grayscale/'
all_label_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/train/label_2d'

all_label_paths = glob.glob(os.path.join(all_label_path, '*.png'))
all_image_paths = glob.glob(os.path.join(all_image_path, '*.png'))

# get all the image names
all_label_names = [os.path.split(name)[-1] for name in all_label_paths]
all_image_names = [os.path.split(name)[-1] for name in all_image_paths]

all_label_paths.sort()
all_image_paths.sort()

all_label_names.sort()
all_image_names.sort()

dst_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/512/train'

# convert PNG to JPG

In [16]:
png_imgs_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/2448/image_grayscale'
dst_path = '/media/saeid/LaCie/Saeid/segmentation/dataset/deepglobe/GrayScaled/MagNet/land-train/'

all_png_images = glob.glob(os.path.join(png_imgs_path, '*.png'))

for png_img in all_png_images:
    image_processing_utils.convert_png_to_jpg(src_img = png_img, dst_path=dst_path)