In [None]:
#############################################
# Setup your environment for classification #
#############################################

# import necessary libraries
from Death_to_Kappa import *
from osgeo import gdal, ogr
from geo_utils import create_raster_from_vector
import glob, os
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import metrics
import seaborn as sns
import numpy as np
from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
import copy

## First Quality Assessment for your Land Cover Classifications

In [None]:
##################################################
# Setup the labels of your classes and their IDs #
##################################################

# in this case we use corine land cover nomenclature with a 3 digit class code

label_con = [[-1, 0, 100, 110, 111, 112, 120, 121, 122, 123, 124, 130, 131, 132, 133, 140, 141, 142, 200, 210, 211, 212, 213, 220, 221, 222, 223, 230, 231, 240, 241, 242, 243, 244, 300, 310, 311, 312, 313, 320, 321, 322, 323, 324, 330, 331, 332, 333, 334, 335, 400, 410, 411, 412, 420, 421, 422, 423, 500, 510, 511, 512, 520, 521, 522, 523],
            ['Error', 'Zero Class', 'Artificial', 'Urban fabric', 'Cont. urban fabric', 'Disc. urban fabric', 'Industrial/Commercial/Transport units', 'Industrial/commercial units', 'Road/rail networks, associated land', 'Port areas', 'Airport', 'Mine/dump/construction sites', 'Mineral extraction sites', 'Dump sites', 'Construction sites', 'Artificial/non-agricultural vegetated areas', 'Green urban areas', 'Sport/Leisure facilities', 'Agricultural', 'Arable land', 'Non-irrigated arable land', 'Permanently irrigated land', 'Rice fields', 'Permanent crops', 'Vineyards', 'Fruit trees/berry plantations', 'olive groves', 'Pastures', 'Pastures', 'Heterogenous agricultural areas', 'Annual crops/Permanent crops', 'Complex cultivation patterns', 'agricultur/significant areas of natural veg.', 'Agro-forestry areas', 'Forest/semi natural', 'Forest', 'Broad-leaved forest', 'Coniferous forest', 'Mixed forest', 'Scrub/herbaceous veg.', 'Natural grasslands', 'Moors/heathland', 'Sclerophyllous veg.', 'Transitional woodland-shrub', 'Open spaces w/ little veg.', 'Beaches/Dunes/Sands', 'Bare rocks', 'Sparsely vegetated areas', 'Burnt areas', 'Glaciers and perpetual snow', 'Wetlands', 'Inland wetlands', 'Inland marshes', 'Peat bogs', 'Maritime wetlands', 'Salt marshes', 'Salines', 'Intertidal flats', 'Inland waters', 'Water courses', 'Water bodies', 'Marine waters', 'Coastal lagoons', 'Estuaries', 'Sea/ocean']]
label_l=len(label_con[0])
for i_label in range(0,label_l-1):
    print(label_con[0][i_label], ':', label_con[1][i_label])

In [None]:
#get path to classified images
classified = []
for file in sorted(glob.glob('./Output_Data/WW/Classified/classified*.tif')):
    classified.append(file)
# list files that have been found
classified

In [None]:
# extract titles from file names
titles = []
for i in classified:
    split1 = i.split('/')
    split2 = split1[-1].split('.')
    titles.append(split2[0])
# list titles that have been found
titles

In [None]:
# add path to reference data
test_paths = dict((
                ('Wielkopolska', ('./Training_Data/WW/WW_LVL3.tif')),
                ))
ds_st = gdal.Open('./Training_Data/WW/WW_LVL3.tif')
test_st = ds_st.ReadAsArray().flatten()
test_labels = dict((
                ('Wielkopolska', (test_st)),
                ))

In [None]:
####################################################
# Generate confusion matrices and create heat maps #
####################################################

aggregate = False

for path in classified:
    ds = gdal.Open(path)
    predicted = ds.ReadAsArray().flatten()
    path_to_reference = ''
    test = []
    coverage_name = 'Wielkopolska'
    test = test_labels[coverage_name]

    #choose your Corine Level of Classification
    level = 'LEVEL_3'
#     if 'lvl_3' in path:
#         level = 'LEVEL_3'
#     else:
#         level = 'LEVEL_1'
            
    if aggregate:
        test[(test > 220) & (test < 230)] = 220
        test[(test == 321)] = 333
        test[(test == 324)] = 310

        predicted[(predicted > 220) & (predicted < 230)] = 220
        predicted[(predicted == 321)] = 333
        predicted[(predicted == 324)] = 310
    
    label_code = np.union1d(test, predicted)
    
    a =[]
    for i in label_code:
        a.append(np.where(label_con[0]==i))
        #print(i,':',len(np.where(predicted==i)[0]), ' ', len(np.where(test==i)[0]))
        
    b = np.unique(a)
    label = []
    for i in range(0, len(b)):
        label.append(label_con[1][b[i]])
    
    split1 = path.split('/')
    split2 = split1[-1].split('.')
    title = split2[0]
    
    fig, ax = plt.subplots(figsize=(10,10))   
    mat = metrics.confusion_matrix(test, predicted)
    
    norm_mat = mat.astype('float') / mat.sum(axis=1)[:, np.newaxis]
    
    mat = sns.heatmap(norm_mat.T, square=True, annot=True, fmt='.2f', cbar=False, xticklabels=label, yticklabels=label)
    
    plt.title('Confusion Matrix', fontsize=25)
    
    plt.xlabel('true label', fontsize=18)
    plt.ylabel('predicted label', fontsize=18)
    title2 = title.strip('classified_WW_')
    fig.savefig('./Output_Data/WW/ConfusionMatrixImages/WW_CMHM_' + title2 + '.jpeg', bbox_inches="tight")
    

In [None]:
#######################################################
# Generate confusion matrices and create ascii tables #
#######################################################
aggregate = False

for path in classified:
    ds = gdal.Open(path)
    predicted = ds.ReadAsArray().flatten()
    
    coverage_name = 'Wielkopolska'

    #predicted = predicted[mData>0]    # Mask application
    test = test_labels[coverage_name]
    #test = test[mData>0]

    if 0 in np.unique(test):            # Do not consider zero class
        test_nonzero = np.nonzero(test)
        test = test[test_nonzero]
        predicted = predicted[test_nonzero]

        path_to_reference = test_paths[coverage_name]
    
    #choose your Corine Level of Classification
    #if 'LVL3' in path:
    level = 'LEVEL_3'
    #else:
    #    level = 'LEVEL_1'
            
    if aggregate:
        test[(test > 220) & (test < 230)] = 220
        test[(test == 321)] = 333
        test[(test == 324)] = 310

        predicted[(predicted > 220) & (predicted < 230)] = 220
        predicted[(predicted == 321)] = 333
        predicted[(predicted == 324)] = 310
    
    label_code = np.union1d(test, predicted)
    a =[]
    for i in label_code:
        a.append(np.where(label_con[0]==i))
    b = np.unique(a)
    label = []
    for i in range(0, len(b)):
        label.append(label_con[1][b[i]])
        
    row_label = copy.deepcopy(label)
    column_label = copy.deepcopy(label)
    row_label.append('column total')
    row_label.append('producer\'s accuracy')
    column_label.append('row total')
    column_label.append('user\'s accuracy')
    print(label)
    split1 = path.split('/')
    split2 = split1[-1].split('.')
    title = split2[0]
    cm = ctab(predicted, test)

    size = cm.shape[0]
    cm_tot = np.zeros((size+2,size+2),dtype=np.float32)
    cm_tot[:size,:size] = cm
    cm_tot[size,:size] = cm.sum(axis=0)
    cm_tot[:size,size] = cm.sum(axis=1)
    # compute user's and producer's accuracy
    oa = 0
    for i in range(size): 
        oa += cm_tot[i,i]
        # user's accuracy
        cm_tot[i,size+1] = cm_tot[i,i] / cm_tot[i,size]
        # producer's accuracy
        cm_tot[size+1,i] = cm_tot[i,i] / cm_tot[size,i]
    cm_tot[size+1,size+1] = oa / cm.sum()
    
    title2 = title.strip('classified_WW_')
    cmdf = pd.DataFrame(cm_tot, index = row_label, columns = column_label)
    pd.DataFrame.to_csv(cmdf, path_or_buf = './Output_Data/WW/ConfusionMatrixCSV/WW_CM_' + title2 + '.csv')

## Statistics


In [None]:
########################################################
# Generate report for each maps that has been assessed #
########################################################

aggregate = False

for path in classified:
    
    #get name of participant
    a = path.split('/')
    a2 = a[-1].split('.')
    title = a2[0]
    mask_array = []
    #############################################################
    # generate reference image from Training Data
    coverage_name = 'Wielkopolska'
    aggregate = False
        
    path_to_reference = test_paths[coverage_name]

    level = 'LEVEL_3'
        
    df = kstat(path, path_to_reference, perCategory = False, aggregate = aggregate)
    print(title,': ',df)
    title2 = title.strip('classified_WW_')
    pd.DataFrame.to_csv(df, path_or_buf = './Output_Data/WW/Stats/WW_QR_' + title2 + '.csv')
    