In [1]:
import numpy as np
import pptk
from pathlib import Path

from data_utils.MastersDataset import MastersDataset

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, jaccard_score

# Random Forests
## Get training data

In [2]:
data_path = Path('/home/luc/PycharmProjects/Pointnet_Pointnet2_pytorch/data/PatrickData/Church/MastersFormat/hand_selected_reversed')

train_data = MastersDataset('train', data_path, sample_all_points=True)
val_data = MastersDataset('validate', data_path, sample_all_points=True)
X, y = np.vstack(train_data.data_segment), np.hstack(train_data.labels_segment)

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]


Sorting by x axis...3.12s


split y-axis: 100%|██████████| 12/12 [00:04<00:00,  2.64it/s]
Fill batches: 100%|██████████| 126/126 [00:08<00:00, 14.96it/s]
100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Sorting by x axis...7.90s


split y-axis: 100%|██████████| 52/52 [00:11<00:00,  4.43it/s]
Fill batches: 100%|██████████| 364/364 [00:47<00:00,  7.69it/s]


In [3]:
def perf_report(clf, X, y):
    preds = clf.predict(X)
    # Evaluating the algorithm
    print(f"Confusion matrix:\n{confusion_matrix(y, preds)}")
    print(f"Classification_report:\n{classification_report(y, preds)}")
    print(f"Accuracy score: {accuracy_score(y, preds)}")
    print(f"F1 score: {f1_score(y, preds)}")
    print(f"Feature importances: {clf.feature_importances_}")
    print(f"IoU/JaccardScore: {jaccard_score(y, preds, average=None)}")
    print(f"mIoU/JaccardScore: {jaccard_score(y, preds, average='Macro')}")
    return preds

## Train Classifier
### n_estimators=32, max_depth=32, min_samples_split=20

In [4]:
classifier = RandomForestClassifier(n_estimators=32, max_depth=32, min_samples_split=20, n_jobs=-1)
classifier.fit(X=X, y=y)
print("Training Performance Report:")
train_preds = perf_report(classifier, X=X, y=y)

print("Validation Performance Report")
val_labels = np.hstack(val_data.labels_segment)
val_preds = perf_report(classifier, X=np.vstack(val_data.data_segment), y=val_labels)


Training Performance Report:
Confusion matrix:
[[6474454     112]
 [   2846  158108]]
Classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   6474566
           1       1.00      0.98      0.99    160954

    accuracy                           1.00   6635520
   macro avg       1.00      0.99      1.00   6635520
weighted avg       1.00      1.00      1.00   6635520

Accuracy score: 0.9995542173032408
F1 score: 0.9907323278211885
Feature importances: [0.11937701 0.27933054 0.55619878 0.04509367]
IoU/JaccardScore: [0.99954334 0.98163486]


ValueError: average has to be one of (None, 'micro', 'macro', 'weighted', 'samples')

In [None]:
v_t = pptk.viewer(X[:,:3],X[:,3], y, train_preds, train_preds!=y)

v_v= pptk.viewer(np.vstack(val_data.data_segment)[:,:3], val_labels, val_preds, val_preds != val_labels)

### Balanced class weights

In [None]:
classifier = RandomForestClassifier(n_estimators=32, max_depth=32, min_samples_split=20,class_weight='balanced', n_jobs=-1)
classifier.fit(X=X, y=y)
print("Training Performance Report:")
train_preds = perf_report(classifier, X=X, y=y)
v_t = pptk.viewer(X[:,:3],X[:,3], y, train_preds, train_preds!=y)

print("Validation Performance Report")
val_labels = np.hstack(val_data.labels_segment)
val_preds = perf_report(classifier, X=np.vstack(val_data.data_segment), y=val_labels)
v= pptk.viewer(np.vstack(val_data.data_segment)[:,:3], val_labels, val_preds, val_preds != val_labels)

In [None]:
v_t = pptk.viewer(X[:,:3],X[:,3], y, train_preds, train_preds!=y)

v_v= pptk.viewer(np.vstack(val_data.data_segment)[:,:3], val_labels, val_preds, val_preds != val_labels)

### Balanced subsample class weights

In [None]:
classifier = RandomForestClassifier(n_estimators=32, max_depth=32, min_samples_split=20,class_weight='balanced_subsample', n_jobs=-1)
classifier.fit(X=X, y=y)
print("Training Performance Report:")
train_preds = perf_report(classifier, X=X, y=y)
v_t = pptk.viewer(X[:,:3],X[:,3], y, train_preds, train_preds!=y)

print("Validation Performance Report")
val_labels = np.hstack(val_data.labels_segment)
val_preds = perf_report(classifier, X=np.vstack(val_data.data_segment), y=val_labels)
v= pptk.viewer(np.vstack(val_data.data_segment)[:,:3], val_labels, val_preds, val_preds != val_labels)

In [None]:
v_t = pptk.viewer(X[:,:3],X[:,3], y, train_preds, train_preds!=y)

v_v= pptk.viewer(np.vstack(val_data.data_segment)[:,:3], val_labels, val_preds, val_preds != val_labels)