In [None]:
### Validate classification outputs (shapefiles) with validation points (shapefile). ###
### The output is a textfile containing the confusion matrix, overall accuracy, and Cohen's Kappa. ###

from rtree import index
import fiona
from shapely.geometry import shape
import os
from sklearn.metrics import cohen_kappa_score
import ogr

# Arguments: classified shapefile, validation points, number of classes, path to store result, attribute name of the classified input, attribute name of the validation input
def validate_classi(infile, vali_points, no_classes, outpath, c_field, v_field):
    filename = infile.split('.')[0].split('/')[-1]
    confmat = [[0 for x in range(no_classes)] for y in range(no_classes)] 
    pred = []
    vali = []
    correct = 0
    idx = index.Index()
    with fiona.open(infile, 'r') as polygons:
        for poly in polygons:
            fid = int(poly['id'])
            geom = shape(poly['geometry'])
            idx.insert(fid, geom.bounds)
        with fiona.open(vali_points, 'r') as points:
            no_val = len(points)
            print(no_val)
            for point in points:
                p = shape(point['geometry'])
                for fid in list(idx.intersection(p.coords[0])):
                    if p.within(shape(polygons[fid]['geometry'])):
                        pred_class = (polygons[fid]['properties'][c_field])
                        vali_class = point['properties'][v_field]
                        if pred_class != 0:
                            confmat[pred_class - 1][vali_class - 1] = confmat[pred_class - 1][vali_class - 1] + 1
                        pred.append(pred_class)
                        vali.append(vali_class)
                        if pred_class == vali_class:
                            correct = correct + 1
    
    kappa = cohen_kappa_score(pred, vali)
    acc = correct / no_val
    print(kappa, acc)
    
    matrixfile = outpath + '/' + filename + '_validation_confmat.csv' 
    try:
        os.remove(matrixfile)
    except OSError:
        pass
    
    with open(matrixfile, 'w') as thefile:
        thefile.write('  ')
        for i in range(no_classes):
            thefile.write(str(i + 1) + ' ')
        thefile.write('\n')
        c = 0
        for e in confmat:
            c = c + 1
            thefile.write(str(c) + ' ')
            for ee in e:
                thefile.write(str(ee) + ' ')
            thefile.write('\n')
        thefile.write("\nCohen's Kappa Coefficient: " + str(kappa) + '\n')
        thefile.write('Overall accuracy: ' + str(acc))

# Change the classes of the result; e.g., merge forest and vegetation class
def generalize_classes(infile, oldclasses, newclasses, cfield):
    filename = infile.split('.')[0]
    print(filename)
    with fiona.open(infile, 'r') as ifile:
        schema = ifile.schema.copy()
        schema['properties']['new_id'] = 'int'
        with fiona.open(filename + '_gen.shp', 'w', 'ESRI Shapefile', schema) as outfile:
            for element in ifile:
                res = {}
                res['properties'] = element['properties']
                res['geometry'] = element['geometry']
                if element['properties'][cfield] in oldclasses:
                    for i in range(0, len(oldclasses)):
                        if oldclasses[i] == element['properties'][cfield]:
                            res['properties']['new_id'] = newclasses[i]
                else:
                    res['properties']['new_id'] = 0
                outfile.write(res)
    # Set the original projection system
    esri = ogr.GetDriverByName('ESRI Shapefile')
    ref = esri.Open(infile)
    ref_layer = ref.GetLayer()
    spatialRef = ref_layer.GetSpatialRef()
    file = open(filename + '_gen.prj', 'w')
    file.write(spatialRef.ExportToWkt())
    file.close()    

if __name__ == '__main__':
    # Classification result as shapefile
    classified = ''
    #classi_out = ''
    # Validation points
    #vali = ''
    vali_out = ''
    # Use this to change the classes; first list: old classes, second list: new classes; must have same length
    #generalize_classes(classified, [1, 2, 3, 4], [1, 2, 3, 3], 'predicted')
    #generalize_classes(vali, [1, 2, 3, 4, 5, 6, 7], [1, 1, 2, 3, 3, 3, 3], 'id')
    validate_classi(classified, vali_out, 3, '', 'predicted', 'new_id')