# Evaluate the models that are saved in the models folder

# Imports

removed checking if in CoLab

In [None]:
!cat /proc/cpuinfo

In [None]:
!pip install -U scikit-learn

In [None]:
#imports
from platform import python_version

#basic python stuff
import os
import json
from pathlib import Path

#basics from the SciPy Stack
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#colab stuff
from google.colab import drive

#data managing
from sklearn.model_selection import train_test_split
from skimage import io #read in images
from skimage.transform import resize

# alternative model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from skimage.color import rgb2gray
from skimage.filters import median
from sklearn.preprocessing import MinMaxScaler

# evaluation
import sklearn

#progress bar
from tqdm.notebook import tqdm

In [None]:
# settings

# implements progress_apply into pandas
tqdm.pandas(desc='Pandas_Progress')

In [None]:
print("Python version =",python_version())

In [None]:
# get access to drive
drive.mount('/content/drive')

# Hyperparameters

In [None]:
random_state = 42

# training parameters
sample_size = 1000
test_size = 0.3
val_size = 0.3

# data preprocessing
img_size = 128
anti_aliasing = True
mask_threshold = 0.5

# model parameters
knn_n_neighbors = 20
knn_algorithm = 'kd_tree'
knn_leaf_size = 30

# Retrieve the Dataset

In [None]:
# unzip data
!unzip -n -q /content/drive/MyDrive/ML_Project_Satellite_Images/data/current_dataset.zip -d /content/

In [None]:
# read in samples.csv with information about the images
samples_df = pd.read_csv('/content/dataset/samples.csv')
samples_df.set_index('id', inplace=True)
samples_df

In [None]:
# paths to the sat/mask folder
path_sat_folder = '/content/dataset/images/satellite/'
path_mask_folder = '/content/dataset/images/mask/'

In [None]:
# append absoulute paths of the images to the dataframe
samples_df['abs_satellite_path'] = samples_df['satellite_file'].apply(lambda x: path_sat_folder+x)
samples_df['abs_mask_path'] = samples_df['mask_file'].apply(lambda x: path_mask_folder+x)

# Functions for Image reading and plotting

In [None]:
# function for image reading
def read_satellite_img(filepath):
  img = io.imread(filepath)
  img = resize(img, output_shape=(img_size,img_size), anti_aliasing=anti_aliasing, preserve_range=True)
  img = img / 255.
  return img

def read_mask_img(filepath):
  img = io.imread(filepath)
  if len(img.shape) > 2:
    img = img[:,:,2]
  img = resize(img, output_shape=(img_size,img_size), anti_aliasing=anti_aliasing, preserve_range=True)
  img = img / 255.
  mask = img > mask_threshold
  img[mask] = 1
  img[~mask] = 0
  return img

In [None]:
# function to load a batch of images
def load_img_batch(samples_df,ids):
  satellite_imgs = samples_df.loc[ids,'abs_satellite_path'].progress_apply(read_satellite_img)
  mask_imgs = samples_df.loc[ids,'abs_mask_path'].progress_apply(read_mask_img)

  satellite_imgs = np.stack(satellite_imgs.to_numpy())
  mask_imgs = np.stack(mask_imgs.to_numpy())

  return satellite_imgs, mask_imgs

In [None]:
# function to show some samples (with or without the predictions)
def show_sample(X, Y, samples_df, ids, Y_pred=None, threshold=None, sample_size=10, fig_height=6):
  rnd_sample_indices = np.random.random_integers(low=0,high=X.shape[0]-1,size=sample_size)
  cols = 2 if Y_pred is None else 3
  if Y_pred is None:
    cols = 2
  elif threshold is not None:
    cols = 4
  else:
    cols = 3

  for i in rnd_sample_indices:
    fig, axs = plt.subplots(1,cols, figsize=(fig_height*cols,fig_height))
    axs[0].set_title(f'Country: {samples_df.loc[ids[i],"country"]}')
    axs[0].imshow(X[i])
    axs[1].set_title('Given Mask')
    axs[1].imshow(Y[i])
    if Y_pred is not None:
      axs[2].set_title('Prediction')
      axs[2].imshow(Y_pred[i])
      if threshold is not None:
        Y_pred_mask = Y_pred[i] >= threshold
        Y_pred[i,Y_pred_mask] = 1
        Y_pred[i,~Y_pred_mask] = 0
        axs[3].set_title(f'Prediction with threshold = {threshold}')
        axs[3].imshow(Y_pred[i])

    for ax in axs:
      ax.set_xticks([])
      ax.set_yticks([])
    fig.tight_layout()

In [None]:
def show_images(X,Ys,names,fig_height=4):
  'Plots X and multiple Y'
  if not isinstance(Ys,list):
    Ys = [Ys]
  if not isinstance(names,list):
    names = [names]
  cols = 1 + len(Ys)
  for i in range(X.shape[0]):
    fig,axs = plt.subplots(1,cols,figsize=(fig_height*cols,fig_height))
    axs[0].axis('off')
    axs[0].imshow(X[i])
    axs[0].set_title('Satellite')
    for j,(Y,name) in enumerate(zip(Ys,names)):
      axs[j+1].axis('off')
      axs[j+1].imshow(Y[i])
      axs[j+1].set_title(name)
    plt.show()

# Split Training and Test Data

In [None]:
sample_ids = samples_df.sample(sample_size, random_state=random_state).index.to_numpy()
sample_ids.shape

In [None]:
# split in (train+val) and test
train_ids, test_ids = train_test_split(sample_ids, test_size=test_size, random_state=random_state)

In [None]:
print('Training Shape: ',train_ids.shape)
print('Test Shape: ',test_ids.shape)

# Train the Model

In [None]:
X_train, Y_train = load_img_batch(samples_df,train_ids)

In [None]:
def extract_features(X):
  # extract grays
  X_gray = rgb2gray(X)
  X_gray = np.expand_dims(X_gray,axis=3)
  # extract absolute gradient of each channel
  X_grad = np.stack(np.gradient(X,axis=(1,2)))
  X_grad = np.linalg.norm(X_grad,axis=0)
  # extract combined gradient
  X_grad_gray = rgb2gray(X_grad)
  X_grad_gray = np.expand_dims(X_grad_gray,axis=3)
  # combine all features
  X_combined = np.concatenate([X,X_gray,X_grad,X_grad_gray],axis=3)
  # flatten the array
  X_flat = X.reshape(-1,X.shape[3])
  # Scale the data
  scaler = MinMaxScaler()
  X_flat = scaler.fit_transform(X_flat)
  # unflatten the array
  X = X_flat.reshape(-1,img_size,img_size,X_flat.shape[1])
  return X

In [None]:
def train(X_train,Y_train):
  print('Extracting Features...')
  X = extract_features(X_train)
  # flatten the arrays
  X_flat = X.reshape(-1,X.shape[3])
  Y_flat = Y_train.flatten()
  # train the random forest
  model = KNeighborsClassifier(
                              n_neighbors=knn_n_neighbors, 
                              algorithm=knn_algorithm,
                              leaf_size=knn_leaf_size,
                              n_jobs=-1)
  
  print('Training Model...')
  model.fit(X_flat,Y_flat)
  return model

In [None]:
def predict(X,model):
  X = extract_features(X)
  X_flat = X.reshape(-1,X.shape[3])
  return model.predict(X_flat).reshape(-1,img_size,img_size)

In [None]:
model = train(X_train,Y_train)

In [None]:
Y_train_pred = predict(X_train, model)

In [None]:
print(f'Train Accuracy: {accuracy_score(Y_train.flatten(), Y_train_pred.flatten())}')

In [None]:
sample_size = 20
sample_idxs = np.random.randint(low=0,high=X_train.shape[0], size=sample_size)
X = X_train[sample_idxs]
Ys = [Y_train[sample_idxs],Y_train_pred[sample_idxs]]
names = ['Mask','Prediction']
show_images(X,Ys,names)

# Evaluate On Test Data

In [None]:
X_test, Y_test = load_img_batch(samples_df,test_ids)

In [None]:
Y_pred = predict(X_test,model)

In [None]:
print(f'Test Accuracy: {accuracy_score(Y_test.flatten(), Y_pred.flatten())}')

In [None]:
sample_size = 20
sample_idxs = np.random.randint(low=0,high=X_test.shape[0], size=sample_size)
X = X_test[sample_idxs]
Ys = [Y_test[sample_idxs],Y_pred[sample_idxs]]
names = ['Mask','Prediction']
show_images(X,Ys,names)