In [3]:
import random
import numpy as np
import os
import matplotlib.pyplot as plt
from PIL import Image
import cv2

import scipy
from scipy import ndimage

from features import rgb2gray

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (30.0, 16.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
def parse_labels(label_arr):
    """    
    None = Normal
    1 = missing data (expected)
    2 = missing data (unexpected)
    3 = miscoloration
    4 = edge warping
    5 = eclipse (missing data)
    6 = eclipse (only miscoloration)
    """
    label = 0
    if len(label_arr) == 0:
        label = 0
    else:
        if 1 in label_arr or 2 in label_arr or 5 in label_arr: # missing data labels!
            label = 1
        else:
            label = 0
    return label

In [6]:
layer_name = 'VIIRS_SNPP_CorrectedReflectance_TrueColor'
img_extension = ".jpg"
data_dir = 'data/4326/'
labels_file = os.path.join(data_dir, layer_name + ".txt")
img_size = (4096, 2048)

In [2]:
# Check the labels file exists!
assert os.path.exists(labels_file), "Cannot find the {} file".format(layer_name + ".txt")
        
# Read in the file line by line
with open(labels_file) as f:
    file_lines = f.read().splitlines()
    num_total_img = len(file_lines)
    for line in file_lines:
        line_list = line.split()  
        split = line_list[0]
        datestring = line_list[1]
        label_arr = [int(item) for item in line_list[2:]]
        label = parse_labels(label_arr)
        
        # Construct and resize the image
        filename = os.path.join(data_dir, datestring, layer_name + img_extension)
        image = np.asarray(Image.open(filename).resize(img_size, Image.BILINEAR))
        
        # Grayscale the image
        imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Threshold the image to black and white
        _, im_thresh = cv2.threshold(imgray, 0, 255, cv2.THRESH_BINARY_INV)

        # Copy the thresholded image
        im_floodfill = im_thresh.copy()

        # Fill in very small holes
        small_kernel = np.ones((4,4),np.uint8)
        im_floodfill = cv2.erode(im_floodfill, small_kernel, iterations = 1)

        # Expand larger holes
        large_kernel = np.ones((30,30),np.uint8)
        im_floodfill = cv2.dilate(im_floodfill, large_kernel, iterations = 1)

        # Mask used to flood filling.
        # Notice the size needs to be 2 pixels than the image.
        h, w = im_floodfill.shape
        mask = np.zeros((h+2, w+2), np.uint8)

        # Floodfill from top and bottom
        for w_idx in range(w):
            cv2.floodFill(im_floodfill, mask, (w_idx,0), 0);   # flood fill top
            cv2.floodFill(im_floodfill, mask, (w_idx,h-1), 0); # flood fill bottom

        # Count the number of holes
        _, nr_objects = ndimage.label(im_floodfill)
        pre_ratio_missing = 1.0 - (im_thresh == 0).sum() / im_thresh.size
        post_ratio_missing = 1.0 - (im_floodfill == 0).sum() / im_thresh.size
        missing_data_flag = False
        if nr_objects == 0 and pre_ratio_missing > 0.165:
            missing_data_flag = True
            print("{}: Detected LARGE hole ({} missing data)".format(datestring, pre_ratio_missing))
        if nr_objects != 0:
            missing_data_flag = True
            print("{}: Detected SMALL hole ({} missing data holes)".format(datestring, nr_objects))
        
        if label == 0 and missing_data_flag: # False Positive
            print("^^^ Algorithm INCORRECTLY detected missing data (FP)")
        elif label == 1 and not missing_data_flag: # True Negative
            print("{}: Algorithm MISSED the hole detection (TN).".format(datestring))

2015-11-25: Detected LARGE hole (0.2586461305618286 missing data)
2015-12-11: Detected SMALL hole (3 missing data holes)
2015-12-21: Detected SMALL hole (1 missing data holes)
2015-12-23: Detected SMALL hole (1 missing data holes)
2016-01-09: Detected SMALL hole (3 missing data holes)
2016-01-11: Detected SMALL hole (1 missing data holes)
2016-01-14: Detected SMALL hole (1 missing data holes)
2016-01-18: Detected SMALL hole (1 missing data holes)
2016-01-19: Detected SMALL hole (1 missing data holes)
2016-01-30: Detected SMALL hole (1 missing data holes)
2016-02-06: Detected SMALL hole (1 missing data holes)
2016-02-07: Algorithm MISSED the hole detection (TN).
2016-02-18: Detected SMALL hole (1 missing data holes)
2016-03-09: Detected SMALL hole (2 missing data holes)
2016-03-18: Detected SMALL hole (1 missing data holes)
2016-03-31: Detected SMALL hole (2 missing data holes)
2016-04-01: Detected SMALL hole (2 missing data holes)
2016-04-02: Detected SMALL hole (2 missing data holes)
