# Prepare input and training data for vector classifier
First, select suitable training areas from the segmentation output and classify them. Next, run the following script to add attributes which will be used for feature classification to both the entire input feature dataset and the training dataset.

In [None]:
### Add data from a .tif file to a polygon shapefile. The average for each polygon area is calculated. ###
### Optionally, add area and circumference of each polygon as an attribute. ###

from rasterstats import zonal_stats
from osgeo import gdal
from osgeo import ogr
import os
import errno
import fiona
from shapely.geometry import mapping
from shapely.geometry import shape
import shutil
import linecache

# https://stackoverflow.com/questions/10840533/most-pythonic-way-to-delete-a-file-which-may-not-exist
def silentremove(filename):
    try:
        shutil.rmtree(filename)
    except OSError as e: # this would be "except OSError, e:" before Python 2.6
        if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
            raise # re-raise exception if a different error occurred

def add_data_to_shp(datapath, vectorpath, folder, suffix = '_data', sh = True):
    filename = datapath.split('.')[0].split('/')[-1]
    v_name = vectorpath.split('.')[0].split('/')[-1]
    tempfolder = folder + '/' + filename + '_temp'
    silentremove(tempfolder)
    os.makedirs(tempfolder)
    
    data = gdal.Open(datapath)
    bands = data.RasterCount
    driver = gdal.GetDriverByName('GTiff')
    x_size = data.RasterXSize  
    y_size = data.RasterYSize 
    geotrans = data.GetGeoTransform()  
    proj = data.GetProjection() 
    
    v_path = vectorpath
    
    for i in range(1, bands+1):
        band = data.GetRasterBand(i)
        bandname = tempfolder + '/' + filename + str(i) + '.tif'
        band_arr = band.ReadAsArray()
        dataset = driver.Create(bandname, x_size, y_size, 1, gdal.GDT_Float32)
        dataset.GetRasterBand(1).SetNoDataValue(float('nan'))
        dataset.SetGeoTransform(geotrans)
        dataset.SetProjection(proj)
        dataset.GetRasterBand(1).WriteArray(band_arr)
        dataset.FlushCache()
        dataset = None
        stats = zonal_stats(v_path, bandname)
        stats[1].keys()
        meantable = (f['mean'] for f in stats)
        meanfile = tempfolder + '/' + 'mean_band_' + str(i) + '.csv'
        with open(meanfile, 'w') as thefile:
            for e in meantable:
                thefile.write(str(e) + '\n')
        counter = 0
        # adapted: https://github.com/mlaloux/My-Python-GIS_StackExchange-answers/blob/master/How%20to%20add%20a%20column%20in%20QGIS%20via%20python.md
        with fiona.collection(v_path, 'r') as polygon:
            # copy of the schema of the original polygon shapefile to the output shapefile (copy)
            schema = polygon.schema.copy()
            schema['properties']['mean' + str(i)] = 'float'
            with fiona.collection(tempfolder + '/' + v_name + '.shp', 'w', 'ESRI Shapefile', schema) as output:
                polygons = (elem for elem in polygon)
                for poly in polygons:
                    #print(counter)
                    # construction of the new shapefile
                    res = {}
                    res['properties'] = poly['properties']
                    res['properties']['mean' + str(i)] = linecache.getline(meanfile, counter)
                    # geometry of the original polygon shapefile
                    res['geometry'] = mapping(shape(poly['geometry']))
                    output.write(res)
                    counter = counter + 1
        v_path = tempfolder + '/' + v_name + '.shp'
    if sh == True:    
        counter = 0
        with fiona.collection(v_path, 'r') as polygon:
            # copy of the schema of the original polygon shapefile to the output shapefile (copy)
            schema = polygon.schema.copy()
            # creation of the new field color in the new schema
            schema['properties']['area'] = 'float'
            schema['properties']['circ'] = 'float'
            with fiona.collection(folder + '/' + v_name + suffix + '.shp', 'w', 'ESRI Shapefile', schema) as output:
                polygons = (elem for elem in polygon)
                for poly in polygons:
                    sh_poly = shape(poly["geometry"])
                    print(counter)
                    # construction of the new shapefile
                    res = {}
                    res['properties'] = poly['properties']
                    res['properties']['area'] = sh_poly.area
                    res['properties']['circ'] = sh_poly.length
                    # geometry of the original polygon shapefile
                    res['geometry'] = mapping(sh_poly)
                    output.write(res)
                    counter = counter + 1
    else:
        with fiona.collection(v_path, 'r') as polygon:
            # copy of the schema of the original polygon shapefile to the output shapefile (copy)
            schema = polygon.schema.copy()
            with fiona.collection(folder + '/' + v_name + suffix + '.shp', 'w', 'ESRI Shapefile', schema) as output:
                polygons = (elem for elem in polygon)
                for poly in polygons:
                    # construction of the new shapefile
                    res = {}
                    res['properties'] = poly['properties']
                    # geometry of the original polygon shapefile
                    res['geometry'] = mapping(shape(poly['geometry']))
                    output.write(res)
    
    # Set the original projection system
    esri = ogr.GetDriverByName('ESRI Shapefile')
    ref = esri.Open(vectorpath)
    ref_layer = ref.GetLayer()
    spatialRef = ref_layer.GetSpatialRef()
    
    file = open(folder + '/' + v_name + suffix + '.prj', 'w')
    file.write(spatialRef.ExportToWkt())
    file.close()
    
    # remove the folder with temporary data
    shutil.rmtree(tempfolder)

if __name__ == '__main__':
    # Raster containing the data to be added
    data = ''
    # Polygon Shapefile to which the data should be added
    vector = ''
    # Folder to store the result
    path = ''
    add_data_to_shp(data, vector, path, '_zonalstats', sh = False)