# Learn from raster and compare spatial Cross-Validation with a simple random selection per class



In [1]:
from MuseoToolBox import learnAndPredict
from MuseoToolBox import vectorTools
import numpy as np

## Select an algorithm from sklearn
Here we select RandomForestClassifier from sklearn.ensemble
We define the param_grid for the Cross-Validation according to the [parameters of RandomForestClassifier](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html).

In [2]:
from sklearn.ensemble import RandomForestClassifier
param_grid = dict(n_estimators=[10,100],max_features=[1,3])

## Define the variables

In [3]:
inRaster = '../data/map.tif'
inVector = '../data/train_withROI.gpkg'
inField = 'Class'
inStand = 'uniqueFID'

## Setup the Cross-Validation

In [4]:
# define Stand Cross-Validation
standMethod = vectorTools.samplingMethods.standCV(inStand,SLOO=False,maxIter=5,seed=12)
standCV = vectorTools.sampleSelection(inVector,inField,standMethod).getCrossValidationForScikitLearn()

# define Random Cross-Validation
randomMethod = vectorTools.samplingMethods.randomCV(nIter=5,seed=12)
randomCV = vectorTools.sampleSelection(inVector,inField,randomMethod).getCrossValidationForScikitLearn()

## Setup learnAndPredict
With the algorithm (and its parameters, such as *oob_score=True* if you want to be able to save it).

In [5]:
# initialize learning

model = learnAndPredict()

## Train and compare performances between CV

In [6]:
for cv in [standCV,randomCV]:  
    model.learnFromRaster(inRaster,inVector,inField,\
                          classifier=RandomForestClassifier(),param_grid=param_grid,cv=cv)
    
    matrix,kappa,OA=model.getStatsFromCV(kappa=True,OA=True)
    
    for idx,mtrx in enumerate(matrix):
        print('Kappa : '+str(kappa[idx])+' | OA : '+str(OA[idx]))
        #print(mtrx)
    
    meanKappa = round(np.mean(kappa)*100,2)
    stdKappa = round(np.std(kappa)*100,2)
    print('Mean kappa for 5 iter using {} CV is : {}% (+-{})'.format(cv.name,meanKappa,stdKappa))
    print(40*"=")


Values from 'Class' field will be extracted
Reading raster values...  [########################################]100%
best n_estimators : 100
best max_features : 1
Kappa : 0.781454876227 | OA : 0.886395100055
Kappa : 0.900845870156 | OA : 0.936419673262
Kappa : 0.748293156879 | OA : 0.862282410608
Kappa : 0.851420455035 | OA : 0.907166009019
Kappa : 0.797427893782 | OA : 0.873121520365
Mean kappa for 5 iter using standCV CV is : 81.59% (+-5.4)
Values from 'Class' field will be extracted
Reading raster values...  [########################################]100%
best n_estimators : 100
best max_features : 1
Kappa : 0.946601014013 | OA : 0.969276077526
Kappa : 0.948277928181 | OA : 0.970231225375
Kappa : 0.945229938517 | OA : 0.968440323158
Kappa : 0.946558075988 | OA : 0.969236279699
Kappa : 0.949580476913 | OA : 0.970907788435
Mean kappa for 5 iter using randomPerClass CV is : 94.72% (+-0.15)


# Predict a raster
When predicting you have the possibility to save the confidence per class, or the maximum confidence from all the class (which will be the predicted class).

In [7]:
model.predictFromRaster(inRaster,outRaster='/tmp/predict.tif',\
                        outConfidence='/tmp/predictProba.tif',\
                        outConfidencePerClass='/tmp/predictProbaPerClass.tif')

Prediction...  [########################################]100%
Saved /tmp/predict.tif using function predictFromArray
Saved /tmp/predictProbaPerClass.tif using function predictConfidencePerClassFromArray
Saved /tmp/predictProba.tif using function predictConfidenceOfPredictedClassFromArray


# Predict an array
You also have the choice to predict labels from an array (which has the same number of bands as your raster/original X).

In [8]:
Y,X = vectorTools.readValuesFromVector(inVector,inField,bandPrefix='band_')
Xpred = model.predictFromArray(X[50:100,:])
print(Xpred)
Xconfidence = model.predictConfidenceOfPredictedClassFromArray(X[50:100,:])
print(Xconfidence)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1]
[ 100.  100.  100. ...,   79.  100.  100.]
