In [1]:
%matplotlib inline

In [2]:
import itertools
import numpy as np
import gc
import os, shutil
import collections
import matplotlib.pylab as plt
import matplotlib as mpl

from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.externals import joblib
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

from imblearn.under_sampling import RandomUnderSampler
from imblearn.ensemble import EasyEnsemble
from sklearn.neighbors import KDTree
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import check_random_state

In [3]:
class_name =  ['Powerline', 'Low Vegetation', 'Impervious Surface', 'Car', 'Fence', 'Roof', 'Facade', 'Shrub', 'Tree']

In [93]:
def feature_extraction(cloud):
    
    #calculathe the fpfh data
    fpfh = cloud[:, 1:34]
    
    # calculate the normal vector
    normal = cloud[:, 38:41]
    
    # calculate relative_height
    ground_xyz = cloud[cloud[:,0].astype(int)==1,-3:]
    kdt = KDTree(ground_xyz[:, 0:2], metric = 'euclidean')
    ind = kdt.query(cloud[:, -3:-1], k=1, return_distance = False)
    relative_height = cloud[:, -1] - ground_xyz[ind.flatten(), -1]
    relative_height = relative_height.reshape([cloud.shape[0], 1])
    
    # compose feature 
    feature = np.hstack((fpfh, normal))
    feature = np.hstack((feature, relative_height))
    return feature

In [94]:
train_data = np.loadtxt('./Vaihingen/fpfh_ground/Vaihingen3D_Traininig_fpfh_2_ground.txt', skiprows=11)
train_data_feature = feature_extraction(train_data)
train_data_class = train_data[:, 34] 

test_data = np.loadtxt('./Vaihingen/fpfh_ground/Vaihingen3D_EVAL_WITH_REF_fpfh_2_ground.txt', skiprows=11)
test_data_feature = feature_extraction(test_data)
test_data_class = test_data[:, 34] 

In [108]:
# RBF classify the data
def resample_data(train_feature, train_class, count_sampleset):
    
    multiplier = {0: 1.0, 1: 0.1, 2: 0.1, 3: 1.0, 4: 1.0, 5: 0.1, 6: 1.0, 7:0.5, 8: 0.1}
    target_stats = collections.Counter(train_class)
    for key, value in target_stats.items():
        target_stats[key] = int(value * multiplier[key])
    
    ee = EasyEnsemble(ratio=target_stats ,n_subsets=count_sampleset)
    return ee.fit_sample(train_feature, train_class)

count_learnbase = 50
X_resampled, y_resampled = resample_data(train_data_feature, train_data_class, count_learnbase)

In [110]:
def train_brf(X_resampled, y_resampled, count_learnbase):
    # generalize dicision tree
    random_state =42 # in order to every time have the same random discision tree for same data set 
    random_state = check_random_state(random_state)
    random_state = random_state.rand(count_learnbase)
    random_state = random_state * 1000000000
    random_state = random_state.astype('int')
    
    clf_estimator = []

    for i in range(count_learnbase):
        tmp_clf = DecisionTreeClassifier(max_features='auto', random_state=random_state[i])
        tmp_clf.fit(X_resampled[i], y_resampled[i])
        clf_estimator.append(tmp_clf)
    
    return clf_estimator

clf_estimator = train_brf(X_resampled, y_resampled, count_learnbase)

In [118]:
def predict_brf(clf_estimator, X):
    first_learnbase = True
    for clf in clf_estimator:
        if first_learnbase:
            predict_X = clf.predict(X)
            first_learnbase = False
        else:
            tmp_predict = clf.predict(X)
            predict_X = np.vstack((predict_X, tmp_predict))
    predict = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=predict_X.astype('int'))
    return predict

In [120]:
test_predict = predict_brf(clf_estimator, test_data_feature)

In [121]:
print(classification_report(test_data_class, test_predict, target_names=class_name))

                    precision    recall  f1-score   support

         Powerline       0.52      0.14      0.22       600
    Low Vegetation       0.61      0.55      0.58     98690
Impervious Surface       0.80      0.72      0.75    101986
               Car       0.70      0.45      0.55      3708
             Fence       0.25      0.18      0.21      7422
              Roof       0.89      0.80      0.84    109048
            Facade       0.25      0.64      0.36     11224
             Shrub       0.33      0.67      0.44     24818
              Tree       0.79      0.68      0.73     54226

       avg / total       0.72      0.68      0.69    411722



In [182]:
precision_recall_fscore = precision_recall_fscore_support(test_data_class, test_predict)
precision_recall_fscore_average = precision_recall_fscore_support(test_data_class, test_predict, average='weighted')

In [185]:
for i in range(3):
    if i == 0:
        measure = np.append(precision_recall_fscore[i], average[i])
    else:
        tmp = np.append(precision_recall_fscore[i], average[i])
        measure = np.vstack((measure, tmp))

In [187]:
measure.shape

(3, 10)

In [195]:
a = measure

In [196]:
b = np.dstack((a, a))

In [197]:
b.shape

(3, 10, 2)

In [210]:
np.save('measure.npy', measure)

In [211]:
c = np.load('measure.npy')

In [213]:
c - measure

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])