In [21]:
import numpy as np
import pptk
from pathlib import Path

from data_utils.MastersDataset import MastersDataset

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, jaccard_score

# Random Forests
## Get training data

In [22]:
data_path = Path('/home/luc/PycharmProjects/Pointnet_Pointnet2_pytorch/data/PatrickData/Church/MastersFormat/hand_selected_reversed')

train_data = MastersDataset('train', data_path, sample_all_points=True)
val_data = MastersDataset('validate', data_path, sample_all_points=True)

100%|██████████| 1/1 [00:00<00:00,  1.17it/s]


Sorting by x axis...3.10s


split y-axis: 100%|██████████| 12/12 [00:04<00:00,  2.80it/s]
Fill batches: 100%|██████████| 126/126 [00:08<00:00, 15.53it/s]
100%|██████████| 1/1 [00:02<00:00,  2.47s/it]


Sorting by x axis...8.00s


split y-axis: 100%|██████████| 52/52 [00:12<00:00,  4.26it/s]
Fill batches: 100%|██████████| 364/364 [00:47<00:00,  7.74it/s]


## Train Classifier

In [23]:
classifier = RandomForestClassifier(n_estimators=32, max_depth=32, min_samples_split=20, n_jobs=-1, verbose=1)

X, y = np.vstack(train_data.data_segment), np.hstack(train_data.labels_segment)
classifier.fit(X=X, y=y)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  32 out of  32 | elapsed:  3.5min finished


RandomForestClassifier(max_depth=32, min_samples_split=20, n_estimators=32,
                       n_jobs=-1, verbose=1)

### Training performance

In [24]:
train_preds = classifier.predict(X)
# Evaluating the algorithm
print(f"Confusion matrix:\n{confusion_matrix(y, train_preds)}")
print(f"Classification_report:\n{classification_report(y, train_preds)}")
print(f"Accuracy score: {accuracy_score(y, train_preds)}")
print(f"F1 score: {f1_score(y, train_preds)}")
print(f"Feature importances: {classifier.feature_importances_}")
print(f"IoU/JaccardScore: {jaccard_score(y, train_preds, average=None)}")
print(f"mIoU/JaccardScore: {jaccard_score(y, train_preds, average='Macro')}")

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  32 out of  32 | elapsed:    4.6s finished


Confusion matrix:
[[6474654     107]
 [   2734  158025]]
Classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   6474761
           1       1.00      0.98      0.99    160759

    accuracy                           1.00   6635520
   macro avg       1.00      0.99      1.00   6635520
weighted avg       1.00      1.00      1.00   6635520

Accuracy score: 0.999571849681713
F1 score: 0.9910909997459948
Feature importances: [0.13046538 0.26589841 0.55609022 0.04754599]


In [25]:
v = pptk.viewer(X[:,:3],X[:,3], y, train_preds, train_preds!=y)

In [26]:
preds = classifier.predict(np.vstack(val_data.data_segment))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  32 out of  32 | elapsed:   17.7s finished


In [27]:
# Evaluating the algorithm
all_labels  = np.hstack(val_data.labels_segment)
print(f"confusion_matrix:\n{confusion_matrix(all_labels, preds)}")
print(f"classification_report:\n{classification_report(all_labels, preds)}")
print(f"accuracy score: {accuracy_score(all_labels, preds)}")
print(f"Feature importances: {classifier.feature_importances_}")

confusion_matrix:
[[13945175  1892495]
 [  170122   736656]]
classification_report:
              precision    recall  f1-score   support

           0       0.99      0.88      0.93  15837670
           1       0.28      0.81      0.42    906778

    accuracy                           0.88  16744448
   macro avg       0.63      0.85      0.67  16744448
weighted avg       0.95      0.88      0.90  16744448

accuracy score: 0.8768178562828706
Feature importances: [0.13046538 0.26589841 0.55609022 0.04754599]


In [28]:
v= pptk.viewer(np.vstack(val_data.data_segment)[:,:3], all_labels, preds, preds != all_labels)