In [2]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from glob import glob

#### data cleaning

In [57]:
def load_dataset(dataset_config, relative_path = "raw"):
    root = dataset_config["root"]
    relative_path = dataset_config["relative_paths"][relative_path]
    g_expr = os.path.join(root, relative_path, "*/*/*")
    imgs = glob(g_expr)
    return imgs

def resize(img_path, new_size, method = cv2.INTER_LINEAR):
    img = plt.imread(img_path)
    *size, _ = img
    resized_img = np.copy(img)
    if size != new_size:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        resized_img = cv2.resize(img,  new_size, interpolation = method)
    return resized_img

def _plot_comparison(raw_img, alt_img):
    fig, ax = plt.subplots(2,1)
    ax[0].imshow(raw_img)
    ax[1].imshow(alt_img)

def save_to(relative_path, img_path, img):
    path_components = img_path.split(os.path.sep)
    img_name = path_components[-1].strip()
    try:
        raw_index = path_components.index("raw")
        path_components[raw_index] = relative_path
        sep = os.path.sep
        path_str = sep.join(path_components)
        exit_code = cv2.imwrite(path_str, img)
        if exit_code:
            print(img_name, "saved successfully!")
    except ValueError:
        print("Skipping", img_name)

def normalize_scale(img):
    norm = np.copy(img)
    normalized_img = cv2.normalize(img, norm, 0, 255, cv2.NORM_MINMAX, cv2.CV_32F)
    return normalized_img

def sanitise(dataset_config, new_size):
    raw_dataset = load_dataset(dataset_config, "raw")
    for img_path in raw_dataset:
        # read image
        raw_img = plt.imread(img_path)
        # resize it to 256x256
        resized_img = resize(img_path, new_size)
        # normalize values between 0-255
        clean_img = normalize_scale(resized_img)
        save_to("cleaned", img_path, clean_img)
        

In [58]:
DATASET_CONFIG = {
    "root" : "../datasets",
    "relative_paths" : {
        "raw" : "raw", 
        "cleaned" : "cleaned"
    }
}

new_size = (256, 256)

sanitise(DATASET_CONFIG, new_size)

10452_sat_08.jpg saved successfully!
10452_sat_18.jpg saved successfully!
111335_sat_00.jpg saved successfully!
111335_sat_01.jpg saved successfully!
111335_sat_02.jpg saved successfully!
111335_sat_03.jpg saved successfully!
111335_sat_04.jpg saved successfully!
111335_sat_07.jpg saved successfully!
111335_sat_08.jpg saved successfully!
111335_sat_10.jpg saved successfully!
111335_sat_12.jpg saved successfully!
111335_sat_13.jpg saved successfully!
111335_sat_15.jpg saved successfully!
111335_sat_23.jpg saved successfully!
111335_sat_24.jpg saved successfully!
111335_sat_25.jpg saved successfully!
111335_sat_26.jpg saved successfully!
111335_sat_27.jpg saved successfully!
111335_sat_35.jpg saved successfully!
111335_sat_36.jpg saved successfully!
111335_sat_37.jpg saved successfully!
111335_sat_44.jpg saved successfully!
111335_sat_45.jpg saved successfully!
111335_sat_46.jpg saved successfully!
111335_sat_47.jpg saved successfully!
111335_sat_48.jpg saved successfully!
111335_sat_54.

#### data pre-processing

In [13]:
def convert_to_gs(img):
    gs_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gs_img
