In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch
from joblib import dump

In [3]:
from skimage.feature import local_binary_pattern
def lbp_histogram(image, radius, bins):
    # compute the LBP histogram of the image
    n_points = 8 * radius
    lbp = local_binary_pattern(image, n_points, radius, method='uniform')
    hist, _ = np.histogram(lbp, bins= bins, density=True)
    return np.ravel(hist)

In [4]:
def random_forest_model(dataSet, testData_percentage, max_depth):
    img_train, img_test, label_train, label_test = train_test_split(dataSet.data, dataSet.target, test_size=testData_percentage, random_state=0)
    classifier = RandomForestClassifier(max_depth=max_depth, random_state=0)
    
    #fitting classifier model
    classifier.fit(img_train, label_train)
    
    # classification_report outputs classification metrics
    # such as precision, recall and F1 score
    pred_result = classifier.predict(img_train)
    print('Classification Training Report: \n', classification_report(label_train, pred_result))
    
    # confusion_matrix outputs how many samples are correctly or incorrectly classified
    print('Train Confusion Matrix: \n', confusion_matrix(label_train, pred_result), "\n")

    # accuracy computes classification accuracy
    print('Train Accuracy: ', accuracy_score(label_train, pred_result), '\n')
    
    # testing with validate data
    validate_result = classifier.predict(img_test)
    print('Classification Testing Report: \n', classification_report(label_test, validate_result, zero_division=0))
    # `confusion_matrix` outputs how many samples are correctly or incorrectly classified
    print('Test Confusion Matrix: \n', confusion_matrix(label_test, validate_result), "\n")
    # `accuracy` computes classification accuracy
    print('Test Accuracy: ', accuracy_score(label_test, validate_result))

    # export model
    dump(classifier, 'random_forest_model.joblib')

In [5]:
# load data from .npz file
npdata = np.load(f'1776_datasetA_200_40.npz')
print(npdata['data'].shape)

number_image = 1776
radius = 1
bins = 8

features=np.zeros(shape=(number_image, bins)) 
#add lbp to features
for i in range(0,number_image):
     features[i] = lbp_histogram(npdata['data'][i], radius, bins)
        
#add depth as features
features = np.insert(features, bins, npdata['depth'], axis=1)
print(features.shape)

(1776, 200, 200)
(1776, 9)


In [6]:
# normalize lbp
for i in range(number_image):
    norm = np.linalg.norm(features[i, :bins])
    for j in range(bins):
        features[i, j] = features[i, j] / norm
        
norm = np.linalg.norm(features[:, bins])
for i in range(number_image):
    features[i, bins] = features[i, bins] / norm
    
# collect data
dataset = Bunch(data = features, target=npdata['label'])
print(dataset.data.shape)

(1776, 9)


In [7]:
random_forest_model(dataset, 0.8, 2)

Classification Training Report: 
               precision    recall  f1-score   support

           0       0.97      0.70      0.81       213
           1       0.68      0.96      0.80       142

    accuracy                           0.81       355
   macro avg       0.82      0.83      0.81       355
weighted avg       0.85      0.81      0.81       355

Train Confusion Matrix: 
 [[149  64]
 [  5 137]] 

Train Accuracy:  0.8056338028169014 

Classification Testing Report: 
               precision    recall  f1-score   support

           0       0.93      0.76      0.84       830
           1       0.73      0.91      0.81       591

    accuracy                           0.83      1421
   macro avg       0.83      0.84      0.82      1421
weighted avg       0.85      0.83      0.83      1421

Test Confusion Matrix: 
 [[633 197]
 [ 51 540]] 

Test Accuracy:  0.8254750175932442
