# Evaluate the grid search

The zipped Dataset (with dataset/images/...) must be in your Google Drive under  
`ML_Project_Satellite_Images/data/current_dataset.zip`  
Also there has to be made at least one run of the grid_search notebook.

# Imports

In [None]:
#imports
from platform import python_version

#basic python stuff
import os
import json
from pathlib import Path

#basics from the SciPy Stack
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})

#colab stuff
from google.colab import drive

#progress bar
from tqdm.notebook import tqdm

In [None]:
# settings

# implements progress_apply into pandas
tqdm.pandas(desc='Pandas_Progress')

In [None]:
print("Python version =",python_version())

In [None]:
# get access to drive
drive.mount('/content/drive')

# Read in the results

In [None]:
file_names = ['grid_search_nico_1','grid_search_nico_2','grid_search_nico_3','grid_search_nico_4',
              'grid_search_samuel_1','grid_search_samuel_2','grid_search_samuel_3','grid_search_samuel_4','grid_search_samuel_5','grid_search_samuel_6']

In [None]:
dfs = []
for file_name in file_names:
  df = pd.read_json(f'/content/drive/MyDrive/ML_Project_Satellite_Images/grid_search/{file_name}.json')
  dfs.append(df)

In [None]:
results_df = dfs[0]
for i in range(1,len(dfs)):
  results_df = results_df.append(dfs[i],ignore_index=True)

# Inspect the results

In [None]:
results_df

In [None]:
results_df = pd.concat([results_df,pd.json_normalize(results_df['params'])], axis=1, join="inner")
results_df

In [None]:
param_names = ['filter_start','filter_levels','kernel_size','kernel_initializer','dropout_start','learning_rate']

In [None]:
results_df[results_df.duplicated(param_names)]

In [None]:
eff_results_df = results_df[results_df['val_acc']>0.8]

In [None]:
results_df.sort_values('best_val_acc',ascending=False)

# Plot the results

In [None]:
# plot all params
fig, axs = plt.subplots(2,3,figsize=(15,10))
for i,ax in enumerate(axs.flatten()):
  param_name = param_names[i]
  ax.plot(eff_results_df[param_name],eff_results_df['best_val_acc'], 'o')
  ax.set_xlabel(param_name)
  ax.set_ylabel('validation accuracy')

plt.tight_layout()
plt.show()

In [None]:
# plot a confusion matrix
fig, axs = plt.subplots(len(param_names),len(param_names),figsize=(15,10))
for i in range(len(param_names)):
  for j in range(len(param_names)):
    ax = axs[i,j]
    x_param = param_names[j]
    y_param = param_names[i]
    sc = ax.scatter(eff_results_df[x_param],eff_results_df[y_param],c=eff_results_df['best_val_acc'],alpha=0.5)
    if i != len(param_names)-1:
      ax.set_xticks([])
    else:
      ax.set_xlabel(x_param)
    if j != 0:
      ax.set_yticks([])
    else:
      ax.set_ylabel(y_param)


fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(sc, cax=cbar_ax)
cbar_ax.set_ylabel('validation accuracy')

#plt.tight_layout()
plt.show()