# Classification accuracy assessment

Rainey Aberle

2022

In [None]:
# -----Import packages
import os
import glob
import numpy as np
import geopandas as gpd
import pandas as pd
import scipy
import xarray as xr
import rioxarray as rxr
import rasterio as rio
from shapely.geometry import Polygon
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics 
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import sys
import pickle
from joblib import dump, load
import json
import datetime

In [None]:
# -----Define paths in directory
site_name = 'LemonCreek'
# base directory (path to snow-cover-mapping/)
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
# path to classified points used to train and test classifiers
data_pts_path = '/Users/raineyaberle/Google Drive/My Drive/Research/PhD/snow_cover_mapping/classified-points/LemonCreek/'
# full path and file name to AOI shapefile
AOI_fn = data_pts_path + '../../study-sites/LemonCreek/AOIs/LemonCreek_RGI_outline.shp' 
# full path and file name to DEM
DEM_fn = data_pts_path + '../../study-sites/LemonCreek/DEMs/LemonCreek_ArcticDEM_clip.tif' 

# -----Determine settings
terrain_parameters = False # whether to use terrain parameters (elevation, slope, aspect) in classification
save_figures = True # whether to save output figures

# -----Add path to functions
sys.path.insert(1, base_path + 'functions/')
import pipeline_utils as f

# -----Load dataset characteristics dictionary
dataset_dict = json.load(open(base_path + 'inputs-outputs/datasets_characteristics.json'))

# -----Load classified points
os.chdir(data_pts_path)
data_pts_fns = glob.glob('LemonCreek*.shp')
data_pts_fns = sorted(data_pts_fns)
data_pts_fns

In [None]:
# -----Load AOI as gpd.GeoDataFrame
AOI = gpd.read_file(AOI_fn)
# reproject the AOI to WGS to solve for the optimal UTM zone
AOI_WGS = AOI.to_crs('EPSG:4326')
AOI_WGS_centroid = [AOI_WGS.geometry[0].centroid.xy[0][0],
                    AOI_WGS.geometry[0].centroid.xy[1][0]]
# grab the optimal UTM zone EPSG code
epsg_UTM = f.convert_wgs_to_utm(AOI_WGS_centroid[0], AOI_WGS_centroid[1])
print('Optimal UTM CRS = EPSG:' + str(epsg_UTM))

# -----Load DEM as Xarray DataSet
# reproject AOI to UTM
AOI_UTM = AOI.to_crs('EPSG:'+str(epsg_UTM))
# load DEM as xarray DataSet
DEM = xr.open_dataset(DEM_fn)
DEM = DEM.rename({'band_data': 'elevation'})
# reproject the DEM to the optimal UTM zone
DEM = DEM.rio.reproject('EPSG:'+str(epsg_UTM))
DEM = DEM.rio.write_crs('EPSG:'+str(epsg_UTM))
# remove unnecessary data (possible extra bands from ArcticDEM or other DEM)
if len(np.shape(DEM.elevation.data))>2:
    DEM['elevation'] = DEM.elevation[0]
    

# -----Plot
fig, ax = plt.subplots(1, 1, figsize=(6,6))
dem_im = ax.imshow(DEM.elevation.data, cmap='terrain', 
          extent=(np.min(DEM.x.data)/1e3, np.max(DEM.x.data)/1e3, np.min(DEM.y.data)/1e3, np.max(DEM.y.data)/1e3))
if type(AOI_UTM.geometry[0])==Polygon:
    ax.plot([x/1e3 for x in AOI_UTM.geometry[0].exterior.coords.xy[0]],
            [y/1e3 for y in AOI_UTM.geometry[0].exterior.coords.xy[1]], '-k')
elif type(AOI_UTM.geometry[0])==MultiPolygon:
    [ax.plot([x/1e3 for x in geom.exterior.coords.xy[0]],
            [y/1e3 for y in geom.exterior.coords.xy[1]], '-k') for geom in AOI_UTM.geometry[0].geoms]
ax.grid()
ax.set_xlabel('Easting [km]')
ax.set_ylabel('Northing [km]')
fig.colorbar(dem_im, ax=ax, shrink=0.5, label='Elevation [m]')
plt.show()

## PlanetScope

In [None]:
# -----Load trained classifier and feature columns
clf_fn = base_path+'inputs-outputs/PlanetScope_classifier_all_sites.joblib'
clf = load(clf_fn)
feature_cols_fn = base_path+'inputs-outputs/PlanetScope_feature_columns.json'
feature_cols = json.load(open(feature_cols_fn))

# -----Subset dataset dictionary
dataset = 'PlanetScope'
ds_dict = dataset_dict[dataset]

# -----Set up testing data
# load image file names
im_fns = sorted(glob.glob(data_pts_path + 'LemonCreek_'+dataset+'*.tif'))
# load data point file names
data_pts_snow_fns = sorted(glob.glob(data_pts_path+'*_snow.shp'))
data_pts_no_snow_fns = sorted(glob.glob(data_pts_path+'*_no-snow.shp'))
# initialize full data frame
data_pts_full = gpd.GeoDataFrame()
# loop through Landsat images
for i, im_fn in enumerate(im_fns):

    # Load image
    im_fn = im_fns[i] # image file name
    im_date = data_pts_snow_fns[i].split('LemonCreek_')[1][0:8]
    im_dt = np.datetime64(im_date)
    im_xr = xr.open_dataset(im_fn)
    
    # Adjust image radiometry
    polygons_top, polygons_bottom = f.create_aoi_elev_polys(AOI_UTM, DEM)
    im_adj, im_adj_method = f.planetscope_adjust_image_radiometry(im_xr, im_dt, polygons_top, polygons_bottom, dataset_dict, skip_clipped=False)  
    
    # load classified points
    # no-snow
    data_pts_no_snow_fn = data_pts_no_snow_fns[i]
    data_pts_no_snow = gpd.read_file(data_pts_no_snow_fn) # read file
    data_pts_no_snow['class'] = 0 # determine class ID
    # snow
    data_pts_snow_fn = data_pts_snow_fns[i]
    data_pts_snow = gpd.read_file(data_pts_snow_fn) # read file
    data_pts_snow['class'] = 1 # determine class ID
    # combine data pts, reproject to image CRS
    data_pts = pd.concat([data_pts_snow, data_pts_no_snow])
    data_pts = data_pts.to_crs(im_xr.rio.crs)
    # drop any NaN points
    data_pts = data_pts.drop('id', axis=1)
    data_pts = data_pts.dropna()

    # grab x and y sample points
    x = [x.geoms[0].coords.xy[0][0] for x in data_pts.geometry]
    y = [x.geoms[0].coords.xy[1][0] for x in data_pts.geometry]
    # sample image values at data points
    for band in feature_cols[0:-1]:
        data_pts[band] = [im_adj[band].sel(x=x, y=y, method='nearest').data[0] for x, y in list(zip(x, y))]
    
    # concatenate to full dataframe
    data_pts_full = pd.concat([data_pts_full, data_pts])

# Add NDSI column
data_pts_full['NDSI'] = ((data_pts_full[ds_dict['NDSI_bands'][0]] - data_pts_full[ds_dict['NDSI_bands'][1]]) 
                          /(data_pts_full[ds_dict['NDSI_bands'][0]] + data_pts_full[ds_dict['NDSI_bands'][1]]) )

# Reduce memory usage in data pts
data_pts_full = data_pts_full.dropna().reset_index(drop=True)
data_pts_full = f.reduce_memory_usage(data_pts_full, verbose=False)

# -----Test the trained classifier
# features
X = data_pts_full[feature_cols] 
# target variable
y = data_pts_full['class'] 
# Predict class values using trained classifier
y_pred = clf.predict(X)
# Adjust outputs to only test snow and no-snow
y_pred[y_pred <= 2] = 1 # snow = 1, 2
y_pred[y_pred > 2] = 0 # no-snow = 3, 4, 5
# Calculate overall accuracy
accuracy = metrics.accuracy_score(y, y_pred)
# Calculate Kappa score
K = metrics.cohen_kappa_score(y, y_pred)
# Calculate confusion matrix
CM = metrics.confusion_matrix(y, y_pred)
# print results
print('n = '+str(len(y_pred)))
print('Overall accuracy = '+str(accuracy))
print('Kappa score = '+str(K))
print('Confusion matrix: ')
print(CM)

# -----Save results to file
results = {'Dataset': dataset,
           'Overall accuracy': accuracy,
           'Kappa score': K,
           'Confusion matrix': {
               'TP': str(CM[0,0]),
               'TN': str(CM[1,1]),
               'FP': str(CM[0,1]),
               'FN': str(CM[1,0])
           },
           'N': len(y_pred)
          }
results_fn = base_path + 'inputs-outputs/classification_performace_metrics_'+dataset+'.json'
json.dump( results, open( results_fn, 'w' ) )
print('Performance metrics saved to file: '+results_fn)

## Landsat

In [None]:
# -----Load trained classifier and feature columns
clf_fn = base_path+'inputs-outputs/Landsat_classifier_all_sites.joblib'
clf = load(clf_fn)
feature_cols_fn = base_path+'inputs-outputs/Landsat_feature_columns.json'
feature_cols = json.load(open(feature_cols_fn))

# -----Subset dataset dictionary
dataset = 'Landsat'
ds_dict = dataset_dict[dataset]

# -----Set up testing data
# load image file names
im_fns = sorted(glob.glob(data_pts_path + 'LemonCreek_'+dataset+'*.tif'))
# load data point file names
data_pts_snow_fns = sorted(glob.glob(data_pts_path+'*_snow.shp'))
data_pts_no_snow_fns = sorted(glob.glob(data_pts_path+'*_no-snow.shp'))
# initialize full data frame
data_pts_full = gpd.GeoDataFrame()
# loop through Landsat images
for i, im_fn in enumerate(im_fns):
    
    # load image as xarray.DataArray
    im_da = rxr.open_rasterio(im_fn)
    # reproject to optimal UTM zone (if necessary)
    im_da = im_da.rio.reproject('EPSG:' + str(epsg_UTM))
    # convert to xarray.DataSet
    im_ds = im_da.to_dataset('band')
    band_names = list(ds_dict['refl_bands'].keys())
    im_ds = im_ds.rename({i + 1: name for i, name in enumerate(band_names)})
    # account for image scalar and no data values
    im_ds = xr.where(im_ds != ds_dict['no_data_value'],
                     im_ds / ds_dict['image_scalar'], np.nan)
    # expand dimensions to include time
    im_dt = np.datetime64(datetime.datetime.fromtimestamp(im_da.attrs['system-time_start'] / 1000))
    im_ds = im_ds.expand_dims({'time': [im_dt]})
    # set CRS
    im_ds.rio.write_crs('EPSG:' + str(im_da.rio.crs.to_epsg()), inplace=True)
    
    # load classified points
    # no-snow
    data_pts_no_snow_fn = data_pts_no_snow_fns[i]
    data_pts_no_snow = gpd.read_file(data_pts_no_snow_fn) # read file
    data_pts_no_snow['class'] = 0 # determine class ID
    # snow
    data_pts_snow_fn = data_pts_snow_fns[i]
    data_pts_snow = gpd.read_file(data_pts_snow_fn) # read file
    data_pts_snow['class'] = 1 # determine class ID
    # combine data pts, reproject to image CRS
    data_pts = pd.concat([data_pts_snow, data_pts_no_snow])
    data_pts = data_pts.to_crs('EPSG:'+str(im_da.rio.crs.to_epsg()))
    # drop any NaN points
    data_pts = data_pts.drop('id', axis=1)
    data_pts = data_pts.dropna()
    # grab x and y sample points
    x = [x.geoms[0].coords.xy[0][0] for x in data_pts.geometry]
    y = [x.geoms[0].coords.xy[1][0] for x in data_pts.geometry]
    # sample image values at data points
    for band in feature_cols[0:-1]:
        data_pts[band] = [im_ds[band].sel(x=x, y=y, method='nearest').data[0] for x, y in list(zip(x, y))]
    # concatenate to data_pts_full
    data_pts_full = pd.concat([data_pts_full, data_pts])


# add NDSI column
data_pts_full['NDSI'] = ((data_pts_full[ds_dict['NDSI_bands'][0]] - data_pts_full[ds_dict['NDSI_bands'][1]]) 
                            /(data_pts_full[ds_dict['NDSI_bands'][0]] + data_pts_full[ds_dict['NDSI_bands'][1]]) )
# remove rows with no data
data_pts_full = data_pts_full.dropna().reset_index(drop=True)
# reduce memory usage in data pts
data_pts_full = f.reduce_memory_usage(data_pts_full)

# -----Test the trained classifier
# features
X = data_pts_full[feature_cols] 
# target variable
y = data_pts_full['class'].values
# Predict class values using trained classifier
y_pred = clf.predict(X)
# Adjust outputs to only test snow and no-snow
y_pred[y_pred <= 2] = 1 # snow = 1, 2
y_pred[y_pred > 2] = 0 # no-snow = 3, 4, 5
# Calculate overall accuracy
accuracy = metrics.accuracy_score(y, y_pred)
# Calculate Kappa score
K = metrics.cohen_kappa_score(y, y_pred)
# Calculate confusion matrix
CM = metrics.confusion_matrix(y, y_pred)
# print results
print('n = '+str(len(y_pred)))
print('Overall accuracy = '+str(accuracy))
print('Kappa score = '+str(K))
print('Confusion matrix: ')
print(CM)

# -----Save results to file
results = {'Dataset': dataset,
           'Overall accuracy': accuracy,
           'Kappa score': K,
           'Confusion matrix': {
               'TP': str(CM[0,0]),
               'TN': str(CM[1,1]),
               'FP': str(CM[0,1]),
               'FN': str(CM[1,0])
           },
           'N': len(y_pred)
          }
results_fn = base_path + 'inputs-outputs/classification_performace_metrics_Landsat.json'
json.dump( results, open( results_fn, 'w' ) )
print('Performance metrics saved to file: '+results_fn)

In [None]:
fig, ax = f.plot_xr_rgb_image(im_ds, ds_dict['RGB_bands'])
ax.plot([x.geoms[0].coords.xy[0][0]/1e3 for x in data_pts.loc[data_pts['class']==1, 'geometry']],
         [x.geoms[0].coords.xy[1][0]/1e3 for x in data_pts.loc[data_pts['class']==1, 'geometry']], '.c')
ax.plot([x.geoms[0].coords.xy[0][0]/1e3 for x in data_pts.loc[data_pts['class']==0, 'geometry']],
         [x.geoms[0].coords.xy[1][0]/1e3 for x in data_pts.loc[data_pts['class']==0, 'geometry']], 
         'o', markerfacecolor='none', markeredgecolor='m')
plt.show()

## Sentinel-2 SR

In [None]:
# -----Load trained classifier and feature columns
clf_fn = base_path+'inputs-outputs/Sentinel-2_SR_classifier_all_sites.joblib'
clf = load(clf_fn)
feature_cols_fn = base_path+'inputs-outputs/Sentinel-2_SR_feature_columns.json'
feature_cols = json.load(open(feature_cols_fn))

# -----Subset dataset dictionary
dataset = 'Sentinel-2_SR'
ds_dict = dataset_dict[dataset]

# -----Set up testing data
# load image file names
im_fns = sorted(glob.glob(data_pts_path + 'LemonCreek_'+dataset+'*.tif'))
# load data point file names
data_pts_snow_fns = sorted(glob.glob(data_pts_path+'*_snow.shp'))
data_pts_no_snow_fns = sorted(glob.glob(data_pts_path+'*_no-snow.shp'))
# initialize full data frame
data_pts_full = gpd.GeoDataFrame()
# loop through Landsat images
for i, im_fn in enumerate(im_fns):
    
    # load image as xarray.DataArray
    im_da = rxr.open_rasterio(im_fn)
    # reproject to optimal UTM zone (if necessary)
    im_da = im_da.rio.reproject('EPSG:' + str(epsg_UTM))
    # convert to xarray.DataSet
    im_ds = im_da.to_dataset('band')
    band_names = list(ds_dict['refl_bands'].keys())
    im_ds = im_ds.rename({i + 1: name for i, name in enumerate(band_names)})
    # account for image scalar and no data values
    im_ds = xr.where(im_ds != ds_dict['no_data_value'],
                     im_ds / ds_dict['image_scalar'], np.nan)
    # expand dimensions to include time
    im_dt = np.datetime64(datetime.datetime.fromtimestamp(im_da.attrs['system-time_start'] / 1000))
    im_ds = im_ds.expand_dims({'time': [im_dt]})
    # set CRS
    im_ds.rio.write_crs('EPSG:' + str(im_da.rio.crs.to_epsg()), inplace=True)
    
    # load classified points
    # no-snow
    data_pts_no_snow_fn = data_pts_no_snow_fns[i]
    data_pts_no_snow = gpd.read_file(data_pts_no_snow_fn) # read file
    data_pts_no_snow['class'] = 0 # determine class ID
    # snow
    data_pts_snow_fn = data_pts_snow_fns[i]
    data_pts_snow = gpd.read_file(data_pts_snow_fn) # read file
    data_pts_snow['class'] = 1 # determine class ID
    # combine data pts, reproject to image CRS
    data_pts = pd.concat([data_pts_snow, data_pts_no_snow])
    data_pts = data_pts.to_crs('EPSG:'+str(im_da.rio.crs.to_epsg()))
    # drop any NaN points
    data_pts = data_pts.drop('id', axis=1)
    data_pts = data_pts.dropna()
    # grab x and y sample points
    x = [x.geoms[0].coords.xy[0][0] for x in data_pts.geometry]
    y = [x.geoms[0].coords.xy[1][0] for x in data_pts.geometry]
    # sample image values at data points
    for band in feature_cols[0:-1]:
        data_pts[band] = [im_ds[band].sel(x=x, y=y, method='nearest').data[0] for x, y in list(zip(x, y))]
    # concatenate to data_pts_full
    data_pts_full = pd.concat([data_pts_full, data_pts])

# add NDSI column
data_pts_full['NDSI'] = ((data_pts_full[ds_dict['NDSI_bands'][0]] - data_pts_full[ds_dict['NDSI_bands'][1]]) 
                            /(data_pts_full[ds_dict['NDSI_bands'][0]] + data_pts_full[ds_dict['NDSI_bands'][1]]) )
# remove rows with no data
data_pts_full = data_pts_full.dropna().reset_index(drop=True)
# reduce memory usage in data pts
data_pts_full = f.reduce_memory_usage(data_pts_full, verbose=False)

# -----Test the trained classifier
# features
X = data_pts_full[feature_cols] 
# target variable
y = data_pts_full['class'].values
# Predict class values using trained classifier
y_pred = clf.predict(X)
# Adjust outputs to only test snow and no-snow
y_pred[y_pred <= 2] = 1 # snow = 1, 2
y_pred[y_pred > 2] = 0 # no-snow = 3, 4, 5
# Calculate overall accuracy
accuracy = metrics.accuracy_score(y, y_pred)
# Calculate Kappa score
K = metrics.cohen_kappa_score(y, y_pred)
# Calculate confusion matrix
CM = metrics.confusion_matrix(y, y_pred)
# print results
print('n = '+str(len(y_pred)))
print('Overall accuracy = '+str(accuracy))
print('Kappa score = '+str(K))
print('Confusion matrix: ')
print(CM)

# -----Save results to file
results = {'Dataset': dataset,
           'Overall accuracy': accuracy,
           'Kappa score': K,
           'Confusion matrix': {
               'TP': str(CM[0,0]),
               'TN': str(CM[1,1]),
               'FP': str(CM[0,1]),
               'FN': str(CM[1,0])
           },
           'N': len(y_pred)
          }
results_fn = base_path + 'inputs-outputs/classification_performace_metrics_'+dataset+'.json'
json.dump( results, open( results_fn, 'w' ) )
print('Performance metrics saved to file: '+results_fn)

## Sentinel-2 TOA

In [None]:
# -----Load trained classifier and feature columns
clf_fn = base_path+'inputs-outputs/Sentinel-2_TOA_classifier_all_sites.joblib'
clf = load(clf_fn)
feature_cols_fn = base_path+'inputs-outputs/Sentinel-2_TOA_feature_columns.json'
feature_cols = json.load(open(feature_cols_fn))

# -----Subset dataset dictionary
dataset = 'Sentinel-2_TOA'
ds_dict = dataset_dict[dataset]

# -----Set up testing data
# load image file names
im_fns = sorted(glob.glob(data_pts_path + 'LemonCreek_'+dataset+'*.tif'))
# load data point file names
data_pts_snow_fns = sorted(glob.glob(data_pts_path+'*_snow.shp'))
data_pts_no_snow_fns = sorted(glob.glob(data_pts_path+'*_no-snow.shp'))
# initialize full data frame
data_pts_full = gpd.GeoDataFrame()
# loop through Landsat images
for i, im_fn in enumerate(im_fns):
    
    # load image as xarray.DataArray
    im_da = rxr.open_rasterio(im_fn)
    # reproject to optimal UTM zone (if necessary)
    im_da = im_da.rio.reproject('EPSG:' + str(epsg_UTM))
    # convert to xarray.DataSet
    im_ds = im_da.to_dataset('band')
    band_names = list(ds_dict['refl_bands'].keys())
    im_ds = im_ds.rename({i + 1: name for i, name in enumerate(band_names)})
    # account for image scalar and no data values
    im_ds = xr.where(im_ds != ds_dict['no_data_value'],
                     im_ds / ds_dict['image_scalar'], np.nan)
    # expand dimensions to include time
    im_dt = np.datetime64(datetime.datetime.fromtimestamp(im_da.attrs['system-time_start'] / 1000))
    im_ds = im_ds.expand_dims({'time': [im_dt]})
    # set CRS
    im_ds.rio.write_crs('EPSG:' + str(im_da.rio.crs.to_epsg()), inplace=True)
    
    # load classified points
    # no-snow
    data_pts_no_snow_fn = data_pts_no_snow_fns[i]
    data_pts_no_snow = gpd.read_file(data_pts_no_snow_fn) # read file
    data_pts_no_snow['class'] = 0 # determine class ID
    # snow
    data_pts_snow_fn = data_pts_snow_fns[i]
    data_pts_snow = gpd.read_file(data_pts_snow_fn) # read file
    data_pts_snow['class'] = 1 # determine class ID
    # combine data pts, reproject to image CRS
    data_pts = pd.concat([data_pts_snow, data_pts_no_snow])
    data_pts = data_pts.to_crs('EPSG:'+str(im_da.rio.crs.to_epsg()))
    # drop any NaN points
    data_pts = data_pts.drop('id', axis=1)
    data_pts = data_pts.dropna()
    # grab x and y sample points
    x = [x.geoms[0].coords.xy[0][0] for x in data_pts.geometry]
    y = [x.geoms[0].coords.xy[1][0] for x in data_pts.geometry]
    # sample image values at data points
    for band in feature_cols[0:-1]:
        data_pts[band] = [im_ds[band].sel(x=x, y=y, method='nearest').data[0] for x, y in list(zip(x, y))]
    # concatenate to data_pts_full
    data_pts_full = pd.concat([data_pts_full, data_pts])

# add NDSI column
data_pts_full['NDSI'] = ((data_pts_full[ds_dict['NDSI_bands'][0]] - data_pts_full[ds_dict['NDSI_bands'][1]]) 
                            /(data_pts_full[ds_dict['NDSI_bands'][0]] + data_pts_full[ds_dict['NDSI_bands'][1]]) )
# remove rows with no data
data_pts_full = data_pts_full.dropna().reset_index(drop=True)
# reduce memory usage in data pts
data_pts_full = f.reduce_memory_usage(data_pts_full, verbose=False)

# -----Test the trained classifier
# features
X = data_pts_full[feature_cols] 
# target variable
y = data_pts_full['class'].values
# Predict class values using trained classifier
y_pred = clf.predict(X)
# Adjust outputs to only test snow and no-snow
y_pred[y_pred <= 2] = 1 # snow = 1, 2
y_pred[y_pred > 2] = 0 # no-snow = 3, 4, 5
# Calculate overall accuracy
accuracy = metrics.accuracy_score(y, y_pred)
# Calculate Kappa score
K = metrics.cohen_kappa_score(y, y_pred)
# Calculate confusion matrix
CM = metrics.confusion_matrix(y, y_pred)
# print results
print('n = '+str(len(y_pred)))
print('Overall accuracy = '+str(accuracy))
print('Kappa score = '+str(K))
print('Confusion matrix: ')
print(CM)

# -----Save results to file
results = {'Dataset': dataset,
           'Overall accuracy': accuracy,
           'Kappa score': K,
           'Confusion matrix': {
               'TP': str(CM[0,0]),
               'TN': str(CM[1,1]),
               'FP': str(CM[0,1]),
               'FN': str(CM[1,0])
           },
           'N': len(y_pred)
          }
results_fn = base_path + 'inputs-outputs/classification_performace_metrics_'+dataset+'.json'
json.dump( results, open( results_fn, 'w' ) )
print('Performance metrics saved to file: '+results_fn)