In [1]:
# imports

import sys
sys.path.append('../')

from water_body_finder.feature_extraction import extract_variance
from water_body_finder.utilities import create_window
import cv2
import rasterio
import pandas as pd
from rasterio.plot import reshape_as_image

In [2]:
# define parameters

# must be odd number
data_resolution = 1

# size of largest window used in feature extraction, must be odd number
data_padding = 15

image_data_directory = "C:\\personal\\satalite-image-water-body-finder\\data\\image_data"
label_data_directory = "C:\\personal\satalite-image-water-body-finder\\data\\label_masks"
output_directory = "C:\\personal\\satalite-image-water-body-finder\\data\\training_sets\\training_set_2"

In [3]:
# define training set format
training_set_format = {
    'variance_a': [],
    'variance_b': [],
    'label': []
}

# define feature extraction functions
def extract_features(image_data, point, data_resolution, training_set):
    window_a = create_window(image_data, point, 5)
    window_b = create_window(image_data, point, data_padding)

    training_set['variance_a'].append(extract_variance(window_a))
    training_set['variance_b'].append(extract_variance(window_b))


In [4]:
for filename in os.listdir(image_data_directory)[1:]:
    # load files
    raster_image_data = rasterio.open(image_data_directory + "/" + filename).read()
    image_data = reshape_as_image(raster_image_data)

    raster_label_data = rasterio.open(label_data_directory + "/" + filename.replace("tif", "jpg")).read()
    mask_data = reshape_as_image(raster_label_data)

    # loop through image data and create training
    height = int(((image_data.shape[0]) / data_resolution) - data_padding / data_resolution)
    width = int(((image_data.shape[1]) / data_resolution) - data_padding / data_resolution)

    data_set = []

    offset = round(data_padding/2)

    num_positive = 0
    num_negative = 0

    training_set = training_set_format

    for j in range(height):
        for i in range(width):
            y = j * data_resolution + offset
            x = i * data_resolution + offset
            
            # extract label
            label_window = create_window(mask_data, [x,y], data_resolution)
            label = label_window.mean(axis=0).mean(axis=0)[0] < 127

            if (label or (num_positive >= num_negative)):
                
                # extract features
                extract_features(image_data, [x, y], data_resolution, training_set)
                training_set['label'].append(label)

                # keep track of positive to negative ratios in training data
                if (label):
                    num_positive += 1
                else:
                    num_negative += 1

    # save to csv
    training_set_df = pd.DataFrame(training_set)
    training_set_df.to_csv(output_directory + '\\' + filename.replace("tif", "csv"))


KeyboardInterrupt: 