<a href="https://colab.research.google.com/github/AllanKamimura/AI/blob/master/satelite%20mask/cloud_dataset_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import csv
import concurrent
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
from concurrent.futures import ThreadPoolExecutor

In [None]:
download_path = None
main_path  = download_path + "/38Clouds/Cloud"
red_path = main_path + "/red_channel"
blue_path = main_path + "/blue_channel"
green_path = main_path + "/green_channel"
output_path = main_path + "/images"

file_names_path = main_path + "/training_patches_38-Cloud.txt"
file_names = []

file1 = open(file_names_path , "r")
for line in file1:
    file_names.append(line.strip())
file1.close()

del file_names[0]
print(len(file_names))

In [None]:
def read_image(image_path):
    image = cv2.imread(image_path)
    image_array = np.asarray(image)
    image_array = image_array[:,:,0]
    image_array = np.expand_dims(image_array, axis = -1)
    return image_array

In [None]:
def create_image(image_index):
    red = read_image(os.path.join(red_path, "red_" + image_index + ".TIF"))
    blue = read_image(os.path.join(blue_path, "blue_" + image_index + ".TIF"))
    green = read_image(os.path.join(green_path, "green_" + image_index + ".TIF"))
    new_image = np.concatenate((red, blue, green), axis = -1)
    plt.imsave(os.path.join(output_path, image_index) + ".jpeg", new_image, vmin = 0, vmax = 255)

with ThreadPoolExecutor(max_workers = 5) as executor:
    future_image = {executor.submit(create_image, image_index): image_index for image_index in file_names}
    for future in concurrent.futures.as_completed(future_image):
        url = future_image[future]
        try:
            data = future.result()
        except Exception as exc:
            print('%r generated an exception: %s' % (image_index, exc))

In [None]:
print(len(os.listdir(output_path )))

In [None]:
image_path = main_path + "/images"
mask_path = main_path + "/masks"

image_list = os.listdir(image_path)
def clean_dataset(image_index):
    image_index, _ = image_index.split(".")
    image = cv2.imread(os.path.join(mask_path, "gt_" + image_index + ".TIF"))
    image_array = np.asarray(image)
    image_array = image_array[:,:,0]
    if image_array.mean() > 240 or image_array.mean() < 10:
        print(image_index)
        mask_image = "{}".format(os.path.join(mask_path, "gt_" + image_index + ".TIF"))
        image_image = "{}".format(os.path.join(image_path, image_index + ".jpeg"))
        !rm $mask_image
        !rm $image_image
    else:
        pass

In [None]:
with ThreadPoolExecutor(max_workers = 5) as executor:
    future_image = {executor.submit(clean_dataset, image_index): image_index for image_index in image_list}
    for future in concurrent.futures.as_completed(future_image):
        url = future_image[future]
        try:
            data = future.result()
        except Exception as exc:
            print(exc, image_index)

patch_54_3_by_8_LC08_L1TP_059014_20160620_20170221_01_T1
patch_544_24_by_15_LC08_L1TP_044010_20160220_20170224_01_T1
patch_541_23_by_13_LC08_L1TP_011002_20160620_20170323_01_T1
patch_55_3_by_11_LC08_L1TP_061017_20160720_20170223_01_T1
patch_55_3_by_11_LC08_L1TP_064017_20160420_20170223_01_T1
patch_416_18_by_8_LC08_L1TP_011002_20160620_20170323_01_T1
patch_55_3_by_11_LC08_L1TP_064014_20160420_20170223_01_T1
patch_55_3_by_13_LC08_L1TP_011247_20160620_20170323_01_T1
patch_55_3_by_15_LC08_L1TP_002054_20160520_20170324_01_T1
patch_55_3_by_9_LC08_L1TP_059014_20160620_20170221_01_T1
patch_55_3_by_9_LC08_L1TP_044010_20160220_20170224_01_T1
patch_56_3_by_10_LC08_L1TP_044010_20160220_20170224_01_T1
patch_56_3_by_10_LC08_L1TP_059014_20160620_20170221_01_T1
patch_56_3_by_12_LC08_L1TP_064017_20160420_20170223_01_T1
patch_56_3_by_14_LC08_L1TP_011247_20160620_20170323_01_T1
patch_57_3_by_13_LC08_L1TP_064017_20160420_20170223_01_T1
patch_57_3_by_15_LC08_L1TP_029040_20160720_20170222_01_T1
patch_57_3_b

In [None]:
print(len(os.listdir(image_path )))
print(len(os.listdir(mask_path )))

In [None]:
main_path  = "/content/drive/MyDrive/Colab_Notebooks/Nasa/38Clouds/Cloud"
image_path = main_path + "/images"
mask_path = main_path + "/masks"

images = os.listdir(image_path)
masks = os.listdir(mask_path)

images.sort()
masks.sort()

def check_list(image,mask):
    image, _ = image.split(".")
    image = "gt_" + image
    mask, _ = mask.split(".")
    if image == mask:
        pass
    else:
        print(image)
        print(mask)

for image, mask in zip(images,masks):
    check_list(image,mask)


In [None]:
print(len(images), len(masks))