# Evaluate the models that are saved in the models folder

# Imports

removed checking if in CoLab

In [None]:
!pip install -U scikit-image

In [None]:
#imports
from platform import python_version

#basic python stuff
import os
import json
from pathlib import Path

#basics from the SciPy Stack
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#colab stuff
from google.colab import drive

#data managing
from sklearn.model_selection import train_test_split
from skimage import io #read in images
from skimage.transform import resize

# alternative model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from skimage.color import rgb2gray
from skimage.filters import median

# evaluation
import sklearn

#progress bar
from tqdm.notebook import tqdm

In [None]:
# settings

# implements progress_apply into pandas
tqdm.pandas(desc='Pandas_Progress')

In [None]:
print("Python version =",python_version())

In [None]:
# get access to drive
drive.mount('/content/drive')

In [None]:
# import local python files
import importlib.util

# https://github.com/maxvfischer/keras-image-segmentation-loss-functions
spec = importlib.util.spec_from_file_location("binary_losses", "/content/drive/MyDrive/ML_Project_Satellite_Images/binary_losses.py")
binary_losses = importlib.util.module_from_spec(spec)
spec.loader.exec_module(binary_losses)

# Hyperparameters

In [None]:
random_state = 42

# training parameters
sample_size = 1000
test_size = 0.3
val_size = 0.3

# data preprocessing
img_size = 128
anti_aliasing = True
mask_threshold = 0.5

# model parameters
rnd_forest_estimators = 10
binary_search_depth = 15

# Retrieve the Dataset

In [None]:
# unzip data
!unzip -n -q /content/drive/MyDrive/ML_Project_Satellite_Images/data/current_dataset.zip -d /content/

In [None]:
# read in samples.csv with information about the images
samples_df = pd.read_csv('/content/dataset/samples.csv')
samples_df.set_index('id', inplace=True)
samples_df

In [None]:
# paths to the sat/mask folder
path_sat_folder = '/content/dataset/images/satellite/'
path_mask_folder = '/content/dataset/images/mask/'

In [None]:
# append absoulute paths of the images to the dataframe
samples_df['abs_satellite_path'] = samples_df['satellite_file'].apply(lambda x: path_sat_folder+x)
samples_df['abs_mask_path'] = samples_df['mask_file'].apply(lambda x: path_mask_folder+x)

# Functions for Image reading and plotting

In [None]:
# function for image reading
def read_satellite_img(filepath):
  img = io.imread(filepath)
  img = resize(img, output_shape=(img_size,img_size), anti_aliasing=anti_aliasing, preserve_range=True)
  img = img / 255.
  return img

def read_mask_img(filepath):
  img = io.imread(filepath)
  if len(img.shape) > 2:
    img = img[:,:,2]
  img = resize(img, output_shape=(img_size,img_size), anti_aliasing=anti_aliasing, preserve_range=True)
  img = img / 255.
  mask = img > mask_threshold
  img[mask] = 1
  img[~mask] = 0
  return img

In [None]:
# function to load a batch of images
def load_img_batch(samples_df,ids):
  satellite_imgs = samples_df.loc[ids,'abs_satellite_path'].progress_apply(read_satellite_img)
  mask_imgs = samples_df.loc[ids,'abs_mask_path'].progress_apply(read_mask_img)

  satellite_imgs = np.stack(satellite_imgs.to_numpy())
  mask_imgs = np.stack(mask_imgs.to_numpy())

  return satellite_imgs, mask_imgs

In [None]:
# function to show some samples (with or without the predictions)
def show_sample(X, Y, samples_df, ids, Y_pred=None, threshold=None, sample_size=10, fig_height=6):
  rnd_sample_indices = np.random.random_integers(low=0,high=X.shape[0]-1,size=sample_size)
  cols = 2 if Y_pred is None else 3
  if Y_pred is None:
    cols = 2
  elif threshold is not None:
    cols = 4
  else:
    cols = 3

  for i in rnd_sample_indices:
    fig, axs = plt.subplots(1,cols, figsize=(fig_height*cols,fig_height))
    axs[0].set_title(f'Country: {samples_df.loc[ids[i],"country"]}')
    axs[0].imshow(X[i])
    axs[1].set_title('Given Mask')
    axs[1].imshow(Y[i])
    if Y_pred is not None:
      axs[2].set_title('Prediction')
      axs[2].imshow(Y_pred[i])
      if threshold is not None:
        Y_pred_mask = Y_pred[i] >= threshold
        Y_pred[i,Y_pred_mask] = 1
        Y_pred[i,~Y_pred_mask] = 0
        axs[3].set_title(f'Prediction with threshold = {threshold}')
        axs[3].imshow(Y_pred[i])

    for ax in axs:
      ax.set_xticks([])
      ax.set_yticks([])
    fig.tight_layout()

In [None]:
def show_images(X,Ys,names,fig_height=4):
  'Plots X and multiple Y'
  if not isinstance(Ys,list):
    Ys = [Ys]
  if not isinstance(names,list):
    names = [names]
  cols = 1 + len(Ys)
  for i in range(X.shape[0]):
    fig,axs = plt.subplots(1,cols,figsize=(fig_height*cols,fig_height))
    axs[0].axis('off')
    axs[0].imshow(X[i])
    axs[0].set_title('Satellite')
    for j,(Y,name) in enumerate(zip(Ys,names)):
      axs[j+1].axis('off')
      axs[j+1].imshow(Y[i])
      axs[j+1].set_title(name)
    plt.show()

# Split Training and Test Data

In [None]:
sample_ids = samples_df.sample(sample_size, random_state=random_state).index.to_numpy()
sample_ids.shape

In [None]:
# split in (train+val) and test
train_ids, test_ids = train_test_split(sample_ids, test_size=test_size, random_state=random_state)

In [None]:
print('Training Shape: ',train_ids.shape)
print('Test Shape: ',test_ids.shape)

# Train the Model

In [None]:
X_train, Y_train = load_img_batch(samples_df,train_ids)

In [None]:
# Train the RandomForest for Color Based Segmentation
X_train_flat = X_train.reshape(-1,3)
Y_train_flat = Y_train.reshape(-1)
rnd_forest = RandomForestClassifier(n_estimators=rnd_forest_estimators,verbose=3,n_jobs=-1)
rnd_forest.fit(X_train_flat, Y_train_flat)

In [None]:
# Get Train Accuracy for Color Based Segmentation
Y_train_pred_flat = rnd_forest.predict(X_train_flat)
print(f'Color Based Accuracy: {accuracy_score(Y_train_flat, Y_train_pred_flat)}')

In [None]:
# Define the Gradient Based Segmentation
def predict_with_gradient(X,grad_threshold):
  Y_grad = np.stack(np.gradient(X,axis=(1,2)))
  Y_grad = np.linalg.norm(Y_grad,axis=0)
  Y_grad = rgb2gray(Y_grad)
  Y_grad = Y_grad / np.max(Y_grad)
  Y_grad = (Y_grad < grad_threshold).astype(int)
  return Y_grad

In [None]:
# Train the Gradient Based Segmentation
# Find the best grad_threshold via Binary Search
def train_gradient_segmentation(X_train,Y_train,binary_search_depth):
  low = 0.
  high = 1.
  Y_train_flat = Y_train.flatten()
  accuracy = {}
  print('First Low...')
  accuracy[low] = accuracy_score(Y_train_flat, predict_with_gradient(X_train,low).flatten())
  print('First High...')
  accuracy[high] = accuracy_score(Y_train_flat, predict_with_gradient(X_train,high).flatten())
  for i in tqdm(range(binary_search_depth),desc='Binary Search: '):
    middle = (low + high) / 2
    accuracy[middle] = accuracy_score(Y_train_flat, predict_with_gradient(X_train,middle).flatten())
    if accuracy[low] + accuracy[middle] > accuracy[middle] + accuracy[high]:
      high = middle
    else:
      low = middle
  best_grad_threshold = np.max([low,middle,high])
  return accuracy, middle

In [None]:
accuracies, grad_threshold = train_gradient_segmentation(X_train,Y_train,binary_search_depth)

In [None]:
accuracies

In [None]:
print(f'Gradient Based Segmentation: {accuracies[grad_threshold]}')

In [None]:
# Define the Combined Segmentation
def predict_combined(X,rnd_forest,grad_threshold,return_all=False):
  print('Predict Colors...')
  Y_pred_color = rnd_forest.predict(X.reshape(-1,3)).reshape(-1,img_size,img_size)
  print('Predict Gradients...')
  Y_pred_grad = predict_with_gradient(X,grad_threshold)
  print('Predict Combined...')
  Y_pred = ((Y_pred_color==1) & (Y_pred_grad==1)).astype(int)
  Y_pred = median(Y_pred)
  Y_pred = (Y_pred > 0.5).astype(int)
  if return_all:
    return Y_pred_color, Y_pred_grad, Y_pred
  else:
    return Y_pred

# Evaluate on Test Data

In [None]:
X_test, Y_test = load_img_batch(samples_df,test_ids)

In [None]:
grad_threshold

In [None]:
Y_pred_color, Y_pred_grad, Y_pred = predict_combined(X_test,rnd_forest,grad_threshold,return_all=True)

In [None]:
print(f'Test Accuracy Color: {accuracy_score(Y_test.flatten(), Y_pred_color.flatten())}')
print(f'Test Accuracy Gradient: {accuracy_score(Y_test.flatten(), Y_pred_grad.flatten())}')
print(f'Test Accuracy Combined: {accuracy_score(Y_test.flatten(), Y_pred.flatten())}')

In [None]:
sample_size = 20
sample_idxs = np.random.randint(low=0,high=X_test.shape[0], size=sample_size)
X = X_test[sample_idxs]
Ys = [Y_test[sample_idxs],Y_pred_color[sample_idxs],Y_pred_grad[sample_idxs],median(Y_pred_grad[sample_idxs]),Y_pred[sample_idxs]]
names = ['Mask','Color Based','Gradient Based','Median Gradient','Combined']
show_images(X,Ys,names)

In [None]:
from skimage.color import rgb2gray

In [None]:
def predict_with_gradient(X,grad_threshold):
  Y_grad = np.stack(np.gradient(X,axis=(1,2)))
  Y_grad = np.linalg.norm(Y_grad,axis=0)
  Y_grad = rgb2gray(Y_grad)
  Y_grad = Y_grad < grad_threshold
  return Y_grad

In [None]:
Y_grad = predict_with_gradient(X_test,0.01)
Y_grad_flat = Y_grad.flatten()

In [None]:
from sklearn.metrics import accuracy_score
print(f'Accuracy: {accuracy_score(Y_test_flat, Y_grad_flat)}')

In [None]:
# Combine both
Y_both = Y_grad & (Y_pred==1)
Y_both_flat = Y_both.flatten()

In [None]:
print(f'Accuracy: {accuracy_score(Y_test_flat, Y_both_flat)}')

In [None]:
sample_size = 10
sample_idxs = np.random.randint(low=0,high=X_test.shape[0], size=sample_size)
X = X_test[sample_idxs]
Ys = [Y_test[sample_idxs],Y_pred[sample_idxs],Y_grad[sample_idxs],Y_both[sample_idxs]]
names = ['Mask','Color RND Forest','Gradient Threshold','Combined with and']
show_images(X,Ys,names)