# What's in this Notebook?

The code to make copies of all the images at another resolution.  
And to make the mask images to 1 channel grayscale images.

# Imports

In [None]:
from ml_project.utils import paths,files

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from skimage.io import imread, imsave
from skimage.transform import resize
from skimage.color import rgb2gray

In [None]:
from tqdm.notebook import tqdm

# Load the dataset

In [None]:
samples_df = pd.read_csv(paths.FILE_SAMPLES)

In [None]:
path_satellite = paths.SATELLITE_IMAGES
path_mask = paths.MASK_IMAGES

In [None]:
samples_df['abs_satellite_file'] = samples_df['satellite_file'].apply(lambda file: path_satellite/file)
samples_df['abs_mask_file'] = samples_df['mask_file'].apply(lambda file: path_mask/file)

# Inspect different sizes and parameters

In [None]:
def plot_images(imgs,shape=None,img_height=10,labels=None):
    if shape is None:
        shape = (1,len(imgs))
        
    figsize = (img_height*shape[0], img_height*shape[1])
    fig, axs = plt.subplots(shape[0],shape[1], figsize=figsize)
    for i,(img, ax) in enumerate(zip(imgs,axs.flatten())):
        ax.imshow(img)
        ax.set_xticks([])
        ax.set_yticks([])
        if labels is not None:
            ax.set_title(labels[i])
        
    fig.tight_layout()
    plt.show()

In [None]:
def plot_different_sizes(img,sizes,fig_height=10,anti_aliasing=True):
    imgs = [resize(img,(size,size),anti_aliasing=anti_aliasing) for size in sizes]
    labels = [f'size={size}' for size in sizes]
    plot_images(imgs,img_height=fig_height,labels=labels)
    

In [None]:
# compare different sizes
sizes = [256,128,64,32,16,8]

for i,sample in samples_df.sample(n=2).iterrows():
    img_satellite = imread(sample['abs_satellite_file'])
    img_mask = imread(sample['abs_mask_file'])
    plot_different_sizes(img_satellite,sizes,fig_height=10)
    plot_different_sizes(img_mask,sizes,fig_height=10)

In [None]:
# compare possible sizes only
sizes = [256,128,64]

for i,sample in samples_df.sample(n=3).iterrows():
    img_satellite = imread(sample['abs_satellite_file'])
    img_mask = imread(sample['abs_mask_file'])
    plot_different_sizes(img_satellite,sizes,fig_height=20)
    plot_different_sizes(img_mask,sizes,fig_height=20)

In [None]:
# compare anti_aliasing vs no anti_aliasing
sizes = [256,128,64]

for i,sample in samples_df.sample(n=2).iterrows():
    img_satellite = imread(sample['abs_satellite_file'])
    img_mask = imread(sample['abs_mask_file'])
    plot_different_sizes(img_satellite,sizes,fig_height=20)
    plot_different_sizes(img_satellite,sizes,fig_height=20,anti_aliasing=False)
    plot_different_sizes(img_mask,sizes,fig_height=20)
    plot_different_sizes(img_mask,sizes,fig_height=20,anti_aliasing=False)

# Inspect the channels and threshold 

In [None]:
# check if resizing changes the channels and scale
for i,sample in samples_df.sample(n=3).iterrows():
    img_satellite = imread(sample['abs_satellite_file'])
    img_satellite_resized = resize(img_satellite,(128,128),anti_aliasing=True,preserve_range=True)
    print(f'before: max={np.max(img_satellite)} ; shape={img_satellite.shape}')
    print(f'after : max={np.max(img_satellite_resized)} ; shape={img_satellite_resized.shape}')

In [None]:
# check if resizing changes the channels and scale
for i,sample in samples_df.sample(n=3).iterrows():
    img_mask = imread(sample['abs_mask_file'])
    img_mask_resized = resize(img_mask,(128,128),anti_aliasing=True,preserve_range=True)
    print(f'before: max={np.max(img_mask)} ; shape={img_mask.shape}')
    print(f'after : max={np.max(img_mask_resized)} ; shape={img_mask_resized.shape}')

In [None]:
# check if grayscale changes channels
for i,sample in samples_df.sample(n=3).iterrows():
    img_mask = imread(sample['abs_mask_file'])
    img_mask_resized = resize(img_mask,(128,128),anti_aliasing=True,preserve_range=True).astype(int)
    img_mask_gray = img_mask_resized[:,:,2] if len(img_mask.shape) > 2 else img_mask_resized 
    img_mask_threshold = (img_mask_gray > 100)*255
    print(f'before: max={np.max(img_mask)} ; shape={img_mask.shape}')
    print(f'gray  : max={np.max(img_mask_gray)} ; shape={img_mask_gray.shape}')
    print(f'thresh: max={np.max(img_mask_threshold)} ; shape={img_mask_threshold.shape}')
    print(np.unique(img_mask_threshold))

In [None]:
# check the difference in original -> grayscale -> threshold
threshold = 100

for i,sample in samples_df.sample(n=3).iterrows():
    img_mask = imread(sample['abs_mask_file'])
    img_mask_resized = resize(img_mask,(128,128),anti_aliasing=True,preserve_range=True).astype(int)
    img_mask_grayscale = img_mask_resized[:,:,2] if len(img_mask.shape) > 2 else img_mask_resized 
    img_mask_threshold = img_mask_grayscale > threshold
    imgs = [img_mask_resized,img_mask_grayscale,img_mask_threshold]
    labels = ['resized','grayscale',f'threshold={threshold}']
    plot_images(imgs,img_height=10,labels=labels)

# Preprocess all Images (resize+channels)

In [None]:
anti_aliasing = True
px = 128
mask_threshold = 100

In [None]:
def preprocess_img(file_path, preprocessed_file_path, img_type):
    img = imread(file_path)
    img = resize(img,(px,px),anti_aliasing=anti_aliasing,preserve_range=True)
    if img_type == 'mask' and len(img.shape) > 2:
        img = img[:,:,2]
        img = (img > mask_threshold)*255
    img = img.astype(np.uint8)
    imsave(preprocessed_file_path,img, check_contrast=False)

In [None]:
samples_df['abs_satellite_file_128'] = samples_df['satellite_file'].apply(lambda x: paths.SATELLITE_IMAGES_128 / x)
samples_df['abs_mask_file_128'] = samples_df['mask_file'].apply(lambda x: paths.MASK_IMAGES_128 / x)

In [None]:
#for i,sample in tqdm(samples_df.iterrows(),total=samples_df.shape[0]):
#    preprocess_img(sample['abs_satellite_file'],sample['abs_satellite_file_128'],'satellite')
#    preprocess_img(sample['abs_mask_file'],sample['abs_mask_file_128'],'mask')

# Inspect the Preprocessed Images

In [None]:
for i,sample in samples_df.sample(n=5).iterrows():
    img_satellite = imread(sample['abs_satellite_file_128'])
    img_mask = imread(sample['abs_mask_file_128'])
    print(np.unique(img_mask))
    print(img_mask.shape)
    plot_images([img_satellite,img_mask],img_height=15,labels=['satellite_128px','mask_binary_128px'])