<a name='1'></a>
## Import Packages and load data

In [1]:
### -*- coding: utf-8 -*-
"""
Created on Mon May 24 13:26:13 2021

@author: kjsanche

Description: 
A function to load the 5 minute granules from MODIS channel 1 
(0.65 microns) and the contrail mask for ML with a CNN.

To do:
ASAP:
-create blacklist of images to exclude (if images are to big)
-separate testing data
-plot testing data vs mask



lower priority:
-optimize image shape by transforming back to original satellite swath projection
-save and set up to load data as tfrecord
-organize/markdown/comment code


Input:
Path   (string)

        
        
Output:
MODISCh1 (2D uint32)
MASK     (2D uint16)
"""


from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import struct
import os
import glob
import cv2
from UNET_Functions import unet_model, summary
from Sat_contrail_read import Extract_RawDef, extract_img, extract_mask, extract_imglist, get_model_memory_usage
from scipy.spatial import ConvexHull
from scipy.ndimage import rotate
from format_input import *
import tensorflow as tf

sys_details = tf.sysconfig.get_build_info()
print(sys_details)
cudnn_version = sys_details["cudnn_version"]
cuda_version = sys_details["cuda_version"]

print('cuda version: ', cuda_version)
print('cudNN version: ',cudnn_version)
print('TF version: ', tf.version.VERSION)



#tf.compat.v1.disable_eager_execution()
#tf.compat.v1.experimental.output_all_intermediates(True) 
path = os.getcwd()
proj_path = os.path.normpath(path + os.sep + os.pardir) #get parent directory
image_path = '/home/kjsanche/Desktop/ExternalSSD/SatContrailData/' #os.path.join(proj_path, 'data/')
save_TFrecord_path = '/home/kjsanche/Desktop/ExternalSSD/SatContrailData/TFrecords/' 


image0065, image0380, image0680, image0850, image1100, image1200, image1330, AUX_list, mask_list = extract_imglist(image_path)

N = len(image0065)
print(N)
print(len(image0380))

OrderedDict([('cpu_compiler', '/home/builder/ktietz/aggregate/tensorflow_recipes/ci_cpu/tensorflow-base_1614583966145/_build_env/bin/x86_64-conda_cos6-linux-gnu-gcc'), ('cuda_compute_capabilities', ['compute_35', 'compute_52', 'compute_60', 'compute_61', 'compute_70', 'compute_75']), ('cuda_version', '10.1'), ('cudnn_version', '7'), ('is_cuda_build', True), ('is_rocm_build', False)])
cuda version:  10.1
cudNN version:  7
TF version:  2.4.1
5924
5924


## extract image dimensions

In [2]:
dim = Extract_RawDef(AUX_list)

## make dataset

In [3]:
img0065_filenames = tf.constant(image0065)
img0380_filenames = tf.constant(image0380)
img0680_filenames = tf.constant(image0680)
img0850_filenames = tf.constant(image0850)
img1100_filenames = tf.constant(image1100)
img1200_filenames = tf.constant(image1200)
img1330_filenames = tf.constant(image1330)

masks_filenames = tf.constant(mask_list)
print(img1100_filenames.shape)
print(masks_filenames.shape)
dataset = tf.data.Dataset.from_tensor_slices((img0065_filenames, img0380_filenames, img0680_filenames, img0850_filenames, img1100_filenames, img1200_filenames, img1330_filenames, masks_filenames, dim))
#print(dataset)

(5924,)
(5924,)


In [4]:
def process_path(img0065_path, img0380_path, img0680_path, img0850_path, img1100_path, img1200_path, img1330_path, mask_path,dim):
# convert binary files to matrix of integers

    img1 = extract_img(str(img0065_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
    img2 = extract_img(str(img0380_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
    img3 = extract_img(str(img0680_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
    img4 = extract_img(str(img0850_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
    img5 = extract_img(str(img1100_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
    img6 = extract_img(str(img1200_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
    img7 = extract_img(str(img1330_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))

    img = np.concatenate((img1, img2, img3, img4, img5, img6, img7), axis=2)
    
    mask = extract_mask(str(mask_path.numpy().decode('ascii')),int(dim[0].numpy()),int(dim[1].numpy()))
   
    return tf.convert_to_tensor(img,dtype=tf.float16), tf.convert_to_tensor(mask,dtype=tf.int8)

def preprocess(image, mask):
    input_image = tf.image.resize(image, (2048, 4096), method='nearest')
    #print(img)
    input_mask = tf.image.resize(mask, (2048, 4096), method='nearest')
    #input_image(input_image>0) = 1
    input_image = (input_image-25500) / np.float16(7500) # assuming range of 180-330 K (BT is multiplied by 100)

    return input_image, input_mask

def _set_shapes(img, mask):
    img.set_shape([4096, 4096, 7])
    mask.set_shape([4096, 4096, 1])

    return img, mask 

#print(dataset)
image_ds = dataset.map(lambda aa, bb, cc, dd, ee, ff, gg, hh, ii: tf.py_function(process_path, [aa, bb, cc, dd, ee, ff, gg, hh, ii], [tf.float16, tf.int8]))
image_ds = image_ds.map(_set_shapes)
print(image_ds)
#processed_image_ds = image_ds.map(preprocess)

#print(processed_image_ds)

<MapDataset shapes: ((4096, 4096, 7), (4096, 4096, 1)), types: (tf.float16, tf.int8)>


In [5]:
def display(display_list):
    plt.figure(figsize=(15, 15))

    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        print(i)
        print(display_list[i].shape)
        if i == 0:
            plt.imshow(np.float32(display_list[i][:,:,0]-display_list[i][:,:,1]))
        else:
            plt.imshow(np.float32(1*display_list[i]))
        plt.axis('off')
    plt.show()

The below code cell uses a lot of memory and therefore should not be used during training.

In [6]:
for image, mask in image_ds.take(1):
    sample_image, sample_mask = image, mask
print(sample_image.shape)



(4096, 4096, 7)


In [7]:
def rotate_img(image, angle):
    rotated_image =  rotate(np.float32(image), angle=angle, reshape=True, order=0)
    #zoom into minimum bounding rectangle
    y_min, y_max, x_min, x_max = [np.min(np.nonzero(rotated_image)[0][:]), np.max(np.nonzero(rotated_image)[0][:]), np.min(np.nonzero(rotated_image)[1][:]), np.max(np.nonzero(rotated_image)[1][:])]

    return rotated_image, y_min, y_max, x_min, x_max

In [8]:
#################################################################
# Check why sometimes images are full of nans
# double check mask names (used for save names)  align (masks and images align at least)
##################################################################

fileList=tf.io.gfile.glob([save_TFrecord_path + '*.tfrecords'])
cnt = 0

for image, mask in image_ds:
    sample_image, sample_mask = image, mask
    filename = mask_list[cnt][-36:-14] 
    cnt+=1
    
    #skip file if file already exists or nans/inf values are in images
    if save_TFrecord_path+'1_1' + filename + '.tfrecords' in fileList:
        continue
    if np.any(np.isnan(sample_image)) or np.any(np.isinf(sample_image)):
        #print('img')
        continue #sys.exit()
    if np.any(np.isnan(sample_mask)) or np.any(np.isinf(sample_mask)):
        #print('mask')
        continue #sys.exit()
        
        
        
    #find coordinates of all nonzero coordinates
    nonzero =np.asarray(np.nonzero(np.float32(sample_image[:,:,0]))).T

    #extract convexhull coordinates, first point and last point must be the same. 
    hull = ConvexHull(nonzero)
    hull = np.vstack((nonzero[hull.vertices,0],nonzero[hull.vertices,1])).T
    hull = np.vstack((hull,hull[0,:]))

    #calculate angle of rotation for minimum bounding rectangle
    angle, barea, bwidth, bheight, center_point, corner_points = minBoundingRect(hull)
    
    rotated_image, y_min, y_max, x_min, x_max = rotate_img(sample_image, angle=90-angle*180/np.pi)
    rotated_labels, _, _, _, _ = rotate_img(sample_mask, angle=90-angle*180/np.pi)
    
    w = x_max-x_min
    h = y_max-y_min
    
    #determine min/max dimensions
    maxCaseDim = np.max([w,h])
    minCaseDim = np.min([w,h])

    #rotate 90 degrees to make width the bigger side if needed
    if h > w:
        rotated_image, y_min, y_max, x_min, x_max = rotate_img(rotated_image, angle=90)
        rotated_image_zoom = rotated_image[y_min:y_max,x_min:x_max]
        rotated_labels, _, _, _, _ = rotate_img(rotated_labels, angle=90)
        rotated_labels_zoom = rotated_labels[y_min:y_max,x_min:x_max]
    else:
        rotated_image_zoom = rotated_image[y_min:y_max,x_min:x_max]
        rotated_labels_zoom = rotated_labels[y_min:y_max,x_min:x_max]
 
    #make images and mask have same h x w (2048,4096)
    mask = np.zeros((2048,4096,1), dtype = float)
    mask[0:rotated_labels_zoom.shape[0], 0:rotated_labels_zoom.shape[1]] += rotated_labels_zoom
    img_test = np.zeros((2048,4096,7), dtype = float)
    img_test[0:rotated_image_zoom.shape[0], 0:rotated_image_zoom.shape[1], :] += rotated_image_zoom

    
    #resize images to decreased resolution
    img_test = cv2.resize(img_test, dsize=(2048, 1024), interpolation=cv2.INTER_NEAREST)
    mask = cv2.resize(mask, dsize=(2048, 1024), interpolation=cv2.INTER_NEAREST)
    
    #add missing dimensions
    img_test = np.expand_dims(img_test, axis=0)
    mask = np.expand_dims(mask, axis=0)
    #print(mask.shape)
    #print(img_test.shape)
    
    #normalize img and set mask to 1
    mask[mask>1]=1
    mask = np.int8(mask)
    img_test = (img_test-25500) / np.float32(7500) # assuming range of 180-330 K (BT is multiplied by 100)
    img_test = np.float16(img_test)
    write_images_to_tfr_long(img_test, mask, filename=filename, max_files=1, out_dir=save_TFrecord_path)


    #display([sample_image,rotated_image_zoom])
        



/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/01__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/20__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/27__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/29__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/31__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/32__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/33__1km.raw
/home/kjsanche/Desktop/ExternalSSD/SatContrailData/2018MYD/109/A2018109.1540/MYD021KM-A2018109.1540.contrail-mask


ValueError: No points given

In [None]:
display([img_test,mask])