In [None]:
import cv2
import sklearn
from sklearn.cluster import KMeans
import scipy.cluster.vq as vq
import numpy as np
import pandas as pd
import pickle
import numpy as np
import random
import scipy.io
import os
from PIL import Image
import matplotlib.pyplot as plt
import time

DSIFT_STEP_SIZE = 4

"""We use 10 classes, 15 images per class, randomly selected, for training, and
15 other images per class, for testing. Feature descriptors d are given. They are multi-scaled
dense SIFT features, and their dimension is 128 (d = 128)."""

NUMBER_IMAGES_CLASS = 15      # the number of images per class
SIFT_type = 'dense'           # The type of SIFT feature  : dense or sparse
random.seed(777)
data_path = "/content/drive/MyDrive/Caltech_101/101_ObjectCategories"
dir_name = ['tick', 'trilobite', 'umbrella', 'watch', 'water_lilly', 'wheelchair', 'wild_cat', 'windsor_chair', 'wrench', 'yin_yang']

def load_data(path = data_path):
    train_data = []
    test_data = []
    train_label = []
    test_label = []

    for i in range(10):
        path = data_path + '/' + dir_name[i]
        files = os.listdir(path)
        img_files = [f for f in files if f.endswith(".jpg")]
        random.shuffle(img_files)
        selected_files = img_files[:30]
        for j in range(30):
            img = cv2.imread(path + '/' + selected_files[j])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.resize(img, (256,256))
            if j < 15:
                train_data.append(img)
                train_label.append(i)
            else:
                test_data.append(img)
                test_label.append(i)

    return train_data, train_label, test_data, test_label

def extract_sift_descriptors(img):
    img = cv2.resize(img, (256,256))
    sift = cv2.SIFT_create()

    sift.setContrastThreshold(0.01)
    sift.setEdgeThreshold(5)
    sift.setSigma(1.3)

    keypoints, descriptor = sift.detectAndCompute(img, None)
    return  descriptor


def build_codebook(X, voc_size):
    features = np.vstack((descriptor for descriptor in X))
    print("size of descriptors : {}".format(features.shape[0]))
    kmeans = KMeans(n_clusters=voc_size)
    kmeans.fit(features)
    codebook = kmeans.cluster_centers_.squeeze()
    return codebook


def input_vector_encoder(feature, codebook):
    code, _ = vq.vq(feature, codebook)
    word_hist, bin_edges = np.histogram(code, bins=range(codebook.shape[0] + 1), normed=True)
    return word_hist, bin_edges

In [None]:
def bow_encoder(num):
    VOC_SIZE = num
    train_data = []
    test_data = []
    train_label = []
    test_label = []

    train_data, train_label, test_data, test_label = load_data(data_path)
    start_time = time.time()

    train_descriptor = [extract_sift_descriptors(img) for img in train_data]
    test_descriptor = [extract_sift_descriptors(img) for img in test_data]

    codebook = build_codebook(train_descriptor, voc_size=VOC_SIZE)

    train_encoded = []
    test_encoded = []
    codewords = []
    for x in train_descriptor:
        hist, bin = input_vector_encoder(x, codebook)
        train_encoded.append(hist)
        codewords = bin

    for x in test_descriptor:
        hist, bin = input_vector_encoder(x, codebook)
        test_encoded.append(hist)

    end_time = time.time()
    encoding_time = end_time - start_time

    train_encoded = np.asarray(train_encoded)
    test_encoded = np.asarray(test_encoded)
    codewords = np.asarray(codewords)
    print("\n")
    return np.array(train_encoded), np.array(train_label), np.array(test_encoded), np.array(test_label), np.array(codewords[:-1]), encoding_time

In [None]:
import time
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

### split function : axis-aligned
def RF_axis_aligned(train_data, train_label, test_data, test_label, num_trees, max_depth, num_feature):
    rf = RandomForestClassifier(n_estimators = num_trees, criterion = "entropy",
                                max_depth = max_depth, max_features = num_feature,
                                bootstrap = True, n_jobs = -1)
    start_time = time.time()
    rf.fit(train_data, train_label)
    end_time = time.time()
    training_time = end_time - start_time

    start_time2 = time.time()
    prediction = rf.predict(test_data)
    end_time2 = time.time()
    testing_time = end_time2 - start_time2

    accuracy = accuracy_score(test_label, prediction)
    confusion = confusion_matrix(test_label, prediction)

    return training_time, testing_time, accuracy, confusion, prediction

In [None]:
train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, codewords_2000, encoding_time_2000 = bow_encoder(2000)
print(encoding_time_2000)

training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 300, 1000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

SIFT feature extraction
Train/Test split: 150/150
Codebook Size: 2000
Building the codebook, it will take some time
size of descriptors : 138517


  features = np.vstack((descriptor for descriptor in X))


Bag of words encoding


  word_hist, bin_edges = np.histogram(code, bins=range(codebook.shape[0] + 1), normed=True)




3869.3343122005463
43.77228856086731 0.3564338684082031 0.5733333333333334 [8 8 7 1 6 8 0 8 2 2 0 8 0 0 8 1 2 1 6 1 1 1 1 6 1 1 1 1 0 1 2 2 2 2 8 2 4
 4 2 2 2 6 1 8 6 3 3 3 4 3 3 7 3 4 8 3 3 8 0 8 4 4 4 4 4 6 5 6 4 5 5 4 4 9
 4 3 5 0 5 1 5 5 3 7 7 5 5 4 4 3 6 1 6 6 4 4 6 6 6 1 1 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 4 2 8 8 5 8 8 8 4 8 9 8 8 9 9 2 9 1 9 9 6 9 1 9 3 2
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  1  2  0  0  0  1  1  6  0]
 [ 1 11  1  0  0  0  2  0  0  0]
 [ 0  1  8  0  2  0  2  0  2  0]
 [ 1  0  0  8  2  0  0  1  3  0]
 [ 0  0  0  0  9  3  2  0  0  1]
 [ 1  1  0  3  2  6  0  2  0  0]
 [ 0  3  0  1  2  0  9  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 0  0  1  0  2  1  0  0 10  1]
 [ 0  2  2  1 

In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 300, 500)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

22.229820013046265 0.3844168186187744 0.5866666666666667 [8 8 7 1 0 8 0 3 2 9 0 8 0 0 8 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1 2 2 9 9 8 2 4
 4 8 2 2 6 3 8 6 3 3 3 4 3 3 7 3 6 8 3 3 8 5 3 2 4 4 4 4 6 5 6 4 6 4 4 4 9
 4 3 5 3 5 6 5 5 8 7 7 5 3 0 5 0 6 1 6 6 4 4 6 6 6 1 6 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 8 2 7 7 8 8 4 2 8 8 5 8 8 8 4 8 9 8 2 9 9 9 9 1 9 9 1 9 1 9 9 2
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 5  1  1  1  0  0  0  1  5  1]
 [ 1 13  1  0  0  0  0  0  0  0]
 [ 0  0  5  1  2  0  2  0  3  2]
 [ 0  0  0  9  1  1  1  1  2  0]
 [ 0  0  1  0  9  1  3  0  0  1]
 [ 2  0  0  3  0  6  1  2  1  0]
 [ 0  2  0  1  2  0 10  0  0  0]
 [ 0  0  1  0  0  0  0 12  2  0]
 [ 0  0  2  0  2  1  0  0  9  1]
 [ 0  3  1  0  0  0  0  0  1 10]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 200, 2000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

81.34903120994568 0.31794095039367676 0.62 [8 8 7 1 0 8 0 8 2 9 0 8 0 0 8 1 2 1 6 1 1 1 1 1 1 1 1 1 1 1 2 2 2 9 8 2 4
 4 2 2 2 6 0 8 6 3 3 8 6 3 3 7 3 2 8 3 3 3 0 8 4 4 4 4 4 6 5 4 6 5 4 4 4 9
 4 3 5 3 5 1 5 5 3 7 7 5 5 0 5 6 6 1 6 6 4 4 6 6 6 1 6 3 6 6 6 7 7 7 7 7 8
 7 7 7 8 7 7 2 7 7 8 8 2 2 8 8 5 8 8 8 5 8 9 8 8 9 9 9 9 1 9 9 6 9 1 9 9 9
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 5  1  1  0  0  0  0  1  6  1]
 [ 0 13  1  0  0  0  1  0  0  0]
 [ 1  0  7  0  2  0  2  0  2  1]
 [ 1  0  1  8  0  0  1  1  3  0]
 [ 0  0  0  0 10  2  2  0  0  1]
 [ 1  1  0  3  0  7  1  2  0  0]
 [ 0  2  0  1  2  0 10  0  0  0]
 [ 0  0  1  0  0  0  0 12  2  0]
 [ 0  0  2  0  0  2  0  0 10  1]
 [ 0  2  0  0  0  0  1  0  1 11]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 300, 2000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

78.81885313987732 0.2972595691680908 0.6066666666666667 [8 8 7 1 0 8 0 8 2 9 0 8 0 0 8 1 1 1 1 1 3 1 1 1 1 1 1 1 0 1 2 2 9 9 8 2 4
 4 2 2 2 6 3 8 6 3 3 3 4 3 3 7 3 4 8 3 3 8 0 8 3 4 4 4 4 6 5 4 7 4 4 4 4 9
 4 3 5 3 5 1 5 5 8 7 7 5 5 0 1 6 6 1 6 6 4 4 6 6 6 1 6 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 2 2 8 8 5 8 8 8 4 8 9 8 8 9 9 2 9 1 9 9 6 9 3 9 9 9
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 5  1  1  0  0  0  0  1  6  1]
 [ 1 13  0  1  0  0  0  0  0  0]
 [ 0  0  6  1  2  0  2  0  2  2]
 [ 1  0  0  8  2  0  0  1  3  0]
 [ 0  0  0  1 10  1  1  1  0  1]
 [ 1  2  0  2  0  6  1  2  1  0]
 [ 0  2  0  1  2  0 10  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 0  0  2  0  1  1  0  0 10  1]
 [ 0  1  1  1  0  0  1  0  1 10]]


In [None]:
############################################################################################################################################
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 100, 2000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

40.06325888633728 0.17907261848449707 0.5866666666666667 [6 8 7 1 0 8 0 3 2 9 0 8 0 0 8 1 1 1 6 1 3 1 1 1 1 1 1 1 0 1 2 2 2 3 8 2 4
 4 8 2 2 6 1 8 6 3 3 3 0 3 3 3 3 4 8 3 3 8 5 8 4 4 4 4 4 6 5 4 7 4 4 4 4 9
 4 4 5 0 5 1 5 5 3 7 7 5 5 0 4 1 6 1 6 6 4 4 5 6 6 1 1 5 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 2 2 8 8 5 8 8 8 5 8 2 8 8 9 9 2 9 1 9 9 6 9 1 9 3 2
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 5  1  1  1  0  0  1  1  4  1]
 [ 1 12  0  1  0  0  1  0  0  0]
 [ 0  1  6  1  2  0  2  0  3  0]
 [ 1  0  0  9  1  1  0  0  3  0]
 [ 0  0  0  0 11  1  1  1  0  1]
 [ 2  2  0  1  2  6  0  2  0  0]
 [ 0  3  0  0  2  2  8  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 0  0  3  0  0  2  0  0 10  0]
 [ 0  2  2  1  0  0  1  0  1  8]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 200, 2000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

40.25531196594238 0.183729887008667 0.62 [8 8 7 1 1 8 0 3 2 9 0 8 0 0 8 1 1 1 0 1 3 1 1 1 1 1 1 1 1 1 2 2 2 7 8 2 4
 9 2 2 2 6 2 9 6 3 3 8 4 3 3 7 3 2 7 3 3 3 0 3 4 4 4 4 4 6 4 4 4 5 4 4 4 9
 4 3 5 0 5 1 5 5 8 7 7 5 5 0 1 5 6 1 6 6 6 4 5 6 6 1 1 3 6 6 0 7 7 7 7 7 8
 7 7 7 7 7 7 8 7 7 8 8 2 2 8 8 4 8 8 8 5 8 9 8 2 9 9 9 9 1 9 9 1 9 3 9 3 9
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  2  1  1  0  0  0  1  5  1]
 [ 1 13  0  1  0  0  0  0  0  0]
 [ 0  0  8  0  1  0  2  1  1  2]
 [ 1  0  1  9  1  0  0  2  1  0]
 [ 0  0  0  0 12  1  1  0  0  1]
 [ 2  2  0  1  0  7  0  2  1  0]
 [ 1  3  0  1  1  1  8  0  0  0]
 [ 0  0  0  0  0  0  0 13  2  0]
 [ 0  0  3  0  1  1  0  0  9  1]
 [ 0  2  0  2  0  0  0  0  1 10]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 100, 2000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

77.4442253112793 0.3253962993621826 0.5866666666666667 [8 8 7 1 1 8 0 3 2 9 0 8 0 0 8 1 1 1 6 1 1 1 1 2 1 1 1 1 0 1 2 2 2 9 8 2 4
 4 8 2 2 6 4 9 6 3 3 3 4 3 3 7 3 4 8 3 3 8 3 8 4 4 4 4 4 6 5 4 7 1 4 4 4 9
 4 4 5 0 5 1 5 5 3 7 7 5 5 0 5 6 6 1 6 6 4 4 6 6 6 1 1 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 2 2 8 8 4 8 8 8 5 8 9 8 8 9 9 2 9 1 9 9 6 9 3 9 3 2
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  2  1  1  0  0  0  1  5  1]
 [ 1 12  1  0  0  0  1  0  0  0]
 [ 0  0  6  0  3  0  2  0  2  2]
 [ 0  0  0  9  2  0  0  1  3  0]
 [ 0  1  0  0 10  1  1  1  0  1]
 [ 2  1  0  1  1  7  1  2  0  0]
 [ 0  3  0  1  2  0  9  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 0  0  2  0  1  1  0  0 10  1]
 [ 0  1  2  2  0  0  1  0  1  8]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 200, 2000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

80.0911157131195 0.29509949684143066 0.5933333333333334 [8 8 7 1 1 8 0 3 8 9 0 8 0 0 8 1 1 1 1 1 3 1 1 6 1 1 1 1 0 1 2 2 2 3 8 2 4
 4 2 2 2 6 0 8 6 3 3 3 4 3 3 7 3 4 7 3 3 8 3 8 4 4 4 4 4 6 5 6 4 5 5 1 4 9
 4 2 5 3 5 1 5 5 3 7 7 5 5 0 5 3 6 1 6 6 0 4 6 6 6 1 1 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 2 2 8 8 0 8 8 8 4 8 9 8 8 9 9 9 9 1 9 9 1 9 1 9 3 9
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  2  0  1  0  0  0  1  6  1]
 [ 1 12  0  1  0  0  1  0  0  0]
 [ 1  0  7  1  2  0  2  0  2  0]
 [ 0  0  0  9  2  0  0  2  2  0]
 [ 0  1  0  0  8  3  2  0  0  1]
 [ 1  1  1  3  0  7  0  2  0  0]
 [ 1  3  0  1  1  0  9  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 1  0  2  0  1  0  0  0 10  1]
 [ 0  3  0  1  0  0  0  0  1 10]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 100, 1000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

20.194461584091187 0.18326401710510254 0.6133333333333333 [8 8 7 1 1 8 0 3 2 8 6 8 0 0 8 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 3 8 2 4
 4 2 2 2 6 2 8 6 3 3 3 4 3 3 7 3 4 8 3 3 8 3 3 4 4 4 4 4 6 4 6 4 6 4 4 4 9
 4 4 5 3 5 5 5 5 3 7 7 5 5 0 1 6 6 1 6 6 4 4 5 6 5 1 6 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 8 2 7 7 8 8 2 2 8 8 5 8 8 8 5 8 2 8 8 9 9 4 9 1 9 9 1 9 1 9 9 9
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 3  2  1  1  0  0  1  1  6  0]
 [ 0 13  2  0  0  0  0  0  0  0]
 [ 0  0  8  1  2  0  2  0  2  0]
 [ 0  0  0 10  2  0  0  1  2  0]
 [ 0  0  0  0 11  0  3  0  0  1]
 [ 1  1  0  2  1  7  1  2  0  0]
 [ 0  2  0  1  2  2  8  0  0  0]
 [ 0  0  1  0  0  0  0 12  2  0]
 [ 0  0  3  0  0  2  0  0 10  0]
 [ 0  3  0  0  1  0  0  0  1 10]]

In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 200, 1000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

20.237104654312134 0.1716930866241455 0.5866666666666667 [8 8 7 1 0 8 0 3 2 2 6 8 0 0 8 1 2 1 0 1 1 1 1 1 1 1 1 1 1 1 2 2 2 9 8 2 4
 4 8 2 2 6 4 8 6 3 3 3 0 3 3 7 3 4 7 3 3 8 3 8 4 4 4 4 4 6 5 6 6 5 5 6 4 9
 4 3 5 3 5 5 5 5 3 7 7 5 5 5 4 3 6 1 6 6 4 4 6 6 6 1 1 1 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 2 2 8 8 5 8 8 8 5 8 9 8 8 9 9 9 9 1 9 9 3 9 3 9 3 2
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  1  2  1  0  0  1  1  5  0]
 [ 1 13  1  0  0  0  0  0  0  0]
 [ 0  0  6  0  3  0  2  0  3  1]
 [ 1  0  0  9  1  0  0  2  2  0]
 [ 0  0  0  0  7  3  4  0  0  1]
 [ 0  0  0  4  1  8  0  2  0  0]
 [ 0  4  0  0  2  0  9  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 0  0  2  0  0  2  0  0 10  1]
 [ 0  1  1  3  0  0  0  0  1  9]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 100, 1000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

44.84177255630493 0.30342912673950195 0.6 [8 8 7 1 0 8 0 8 2 9 6 8 0 0 8 1 2 1 6 1 1 1 1 1 1 1 1 1 1 1 2 2 9 9 8 2 4
 9 2 9 2 6 0 8 6 3 3 3 4 3 8 7 3 4 8 3 3 3 0 8 4 4 4 4 4 6 5 4 4 5 5 4 4 9
 4 0 5 5 5 1 5 5 3 7 7 5 5 0 5 3 6 1 6 6 4 4 5 6 6 1 6 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 8 7 7 8 8 4 2 8 8 5 8 8 8 5 8 9 8 8 9 9 9 9 1 9 9 6 9 1 9 9 2
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  1  1  0  0  0  1  1  6  1]
 [ 0 13  1  0  0  0  1  0  0  0]
 [ 1  0  5  0  1  0  2  0  2  4]
 [ 1  0  0  8  2  0  0  1  3  0]
 [ 0  0  0  0 10  3  1  0  0  1]
 [ 2  1  0  2  0  8  0  2  0  0]
 [ 0  2  0  1  2  1  9  0  0  0]
 [ 0  0  0  0  0  0  0 13  2  0]
 [ 0  0  1  0  1  2  0  0 10  1]
 [ 0  2  1  0  0  0  1  0  1 10]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 200, 1000)
print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)

41.963783502578735 0.3569319248199463 0.6 [8 8 7 1 1 8 0 8 2 9 6 8 0 0 8 1 2 1 0 1 3 1 1 1 1 1 1 1 0 1 2 2 2 3 8 2 4
 4 8 2 2 6 0 8 6 3 3 3 4 3 3 7 3 3 8 3 3 8 5 3 4 4 4 4 4 6 4 4 7 1 5 4 4 9
 4 4 5 5 5 1 5 5 3 7 7 5 5 0 5 3 6 1 6 6 4 4 5 6 6 1 1 3 6 6 6 7 7 7 7 7 8
 7 7 7 7 7 7 2 7 7 8 8 4 2 8 8 2 8 8 8 4 8 9 8 8 9 9 9 9 1 9 9 6 9 1 9 9 9
 9 8] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 3  2  1  0  0  0  1  1  6  1]
 [ 2 11  1  1  0  0  0  0  0  0]
 [ 1  0  6  1  2  0  2  0  3  0]
 [ 0  0  0 10  1  1  0  1  2  0]
 [ 0  1  0  0 10  1  1  1  0  1]
 [ 1  1  0  2  1  8  0  2  0  0]
 [ 0  3  0  1  2  1  8  0  0  0]
 [ 0  0  1  0  0  0  0 13  1  0]
 [ 0  0  2  0  2  0  0  0 10  1]
 [ 0  2  0  0  0  0  1  0  1 11]]


In [None]:
############################################################################################################################################
############################################################################################################################################
train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, codewords_2000, encoding_time_2000 = bow_encoder(2000)
print(encoding_time_2000)

SIFT feature extraction
Train/Test split: 150/150
Codebook Size: 2000
Building the codebook, it will take some time
size of descriptors : 137256


  features = np.vstack((descriptor for descriptor in X))


Bag of words encoding


  word_hist, bin_edges = np.histogram(code, bins=range(codebook.shape[0] + 1), normed=True)




3555.934947013855


In [None]:
train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, codewords_3000, encoding_time_3000 = bow_encoder(3000)
print(encoding_time_3000)

SIFT feature extraction
Train/Test split: 150/150
Codebook Size: 3000
Building the codebook, it will take some time
size of descriptors : 139418


  features = np.vstack((descriptor for descriptor in X))


Bag of words encoding


  word_hist, bin_edges = np.histogram(code, bins=range(codebook.shape[0] + 1), normed=True)




5071.218578577042


In [None]:
train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, codewords_4000, encoding_time_4000 = bow_encoder(4000)
print(encoding_time_4000)

SIFT feature extraction
Train/Test split: 150/150
Codebook Size: 4000
Building the codebook, it will take some time
size of descriptors : 145401


  features = np.vstack((descriptor for descriptor in X))


Bag of words encoding


  word_hist, bin_edges = np.histogram(code, bins=range(codebook.shape[0] + 1), normed=True)




6852.588196992874


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 100, 1000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 500, 100, 1500)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 500, 100, 2000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

22.02465510368347 0.14214205741882324 0.5933333333333334 [8 4 0 6 0 0 4 8 0 0 2 6 0 6 8 6 1 0 1 1 1 1 1 6 1 1 1 1 1 3 2 3 2 1 9 2 0
 2 9 2 4 2 2 2 6 3 7 4 6 8 8 3 8 0 8 8 9 7 3 3 1 4 6 3 4 4 4 4 9 4 4 8 4 4
 4 3 3 7 8 5 7 3 5 4 7 2 5 4 5 6 0 6 6 8 6 6 6 0 3 6 5 6 6 8 8 7 7 7 7 7 7
 7 7 4 5 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 6 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 6  0  1  0  2  0  3  0  3  0]
 [ 1 11  0  1  0  0  2  0  0  0]
 [ 1  1  8  1  1  0  1  0  0  2]
 [ 1  0  0  4  1  0  1  2  5  1]
 [ 0  1  0  1 10  0  1  0  1  1]
 [ 0  0  1  3  2  4  1  3  1  0]
 [ 2  0  0  1  0  1  8  0  3  0]
 [ 0  0  0  0  1  1  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  0  0  0  1  1  0 12]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 200, 1000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 500, 200, 1500)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 500, 200, 2000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

24.818297147750854 0.17193150520324707 0.58 [8 0 0 6 3 0 0 8 0 0 2 6 0 6 8 6 1 0 1 1 1 1 6 6 1 0 1 1 1 3 2 3 2 1 9 2 3
 2 9 2 4 2 2 9 6 3 7 2 0 8 8 7 3 0 7 8 9 7 7 3 4 4 4 3 4 4 4 4 9 5 4 8 4 4
 4 3 5 5 7 5 7 3 5 4 7 1 5 4 3 6 0 6 6 8 6 6 4 7 3 6 7 6 6 8 8 7 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 3 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 7  0  1  1  0  0  3  0  3  0]
 [ 2  9  0  1  0  0  3  0  0  0]
 [ 0  1  7  2  1  0  1  0  0  3]
 [ 2  0  1  3  0  0  0  5  3  1]
 [ 0  0  0  1 11  1  0  0  1  1]
 [ 0  1  0  3  2  5  1  3  0  0]
 [ 1  0  0  1  1  0  7  2  3  0]
 [ 1  0  0  0  1  0  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  1  0  0  0  1  0 12]]
23.5797188282

In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 100, 1000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 1000, 100, 1500)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 1000, 100, 2000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

40.55638885498047 0.24196100234985352 0.5466666666666666 [8 4 0 6 3 0 4 8 0 7 9 6 0 6 8 6 1 0 1 1 1 1 1 1 1 1 1 1 1 3 8 3 2 1 9 2 0
 2 2 2 4 8 2 9 6 3 7 4 6 8 8 7 3 8 8 8 9 7 7 8 1 4 6 3 4 4 4 4 9 5 4 8 4 4
 4 3 5 7 8 0 7 3 5 4 7 2 5 4 3 6 0 6 6 8 6 6 6 0 3 6 7 6 6 8 8 7 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 8 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  0  0  1  2  0  3  1  3  1]
 [ 1 12  0  1  0  0  1  0  0  0]
 [ 1  1  6  1  1  0  1  0  2  2]
 [ 0  0  0  2  1  0  1  4  6  1]
 [ 0  1  0  1  9  1  1  0  1  1]
 [ 1  0  1  3  2  3  1  3  1  0]
 [ 2  0  0  1  0  0  8  1  3  0]
 [ 1  0  0  0  1  0  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  0  0  0  0  1  1 12]]


In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 200, 1000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 1000, 200, 1500)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 1000, 200, 2000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

41.369564056396484 0.26750993728637695 0.5866666666666667 [8 0 0 6 3 0 4 8 0 7 2 6 0 6 8 6 1 0 1 1 1 1 1 1 1 1 1 1 1 3 2 3 2 1 9 2 3
 2 2 2 4 2 2 9 6 3 7 4 6 8 9 3 8 0 8 8 9 7 7 8 1 4 6 3 4 4 4 4 9 5 4 8 4 4
 4 3 6 7 8 5 7 5 5 4 7 2 5 5 3 6 8 6 6 8 6 6 6 6 3 6 7 6 6 8 8 7 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 2 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 5  0  1  1  1  0  3  1  3  0]
 [ 1 12  0  1  0  0  1  0  0  0]
 [ 0  1  8  2  1  0  1  0  0  2]
 [ 1  0  0  2  1  0  1  3  5  2]
 [ 0  1  0  1  9  1  1  0  1  1]
 [ 0  0  1  2  1  5  2  3  1  0]
 [ 0  0  0  1  0  0  9  1  4  0]
 [ 1  0  0  0  1  0  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  2  0  0  0  0  1  0 12]]

In [None]:
###########################################################################################333
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 100, 2000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 500, 100, 3000)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 500, 100, 4000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

39.4659206867218 0.13155508041381836 0.54 [8 4 6 6 0 0 4 8 0 7 9 6 0 6 8 6 1 0 1 1 1 1 6 6 1 0 1 1 1 3 2 3 0 1 9 9 8
 2 2 2 4 2 2 9 0 3 7 4 0 8 9 7 3 8 3 8 8 7 1 8 4 4 4 3 4 4 4 4 9 8 4 8 4 4
 4 8 0 7 8 7 7 3 5 4 7 2 5 4 5 6 8 6 6 8 6 6 6 0 3 6 7 6 6 8 8 3 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 3 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  0  0  0  2  0  4  1  3  1]
 [ 2  9  0  1  0  0  3  0  0  0]
 [ 2  1  6  1  1  0  0  0  1  3]
 [ 1  1  0  3  1  0  0  3  5  1]
 [ 0  0  0  1 11  0  0  0  2  1]
 [ 1  0  1  1  2  3  1  4  2  0]
 [ 1  0  0  1  0  0  8  1  4  0]
 [ 1  0  0  1  1  0  0 12  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  1  0  0  0  1  0 12]]
47.606566667556

In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 500, 200, 2000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 500, 200, 3000)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 500, 200, 4000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

38.89171028137207 0.11774682998657227 0.58 [8 0 6 6 3 0 0 8 0 7 3 6 9 6 8 6 1 0 1 1 1 1 1 6 1 0 1 1 1 9 2 3 2 1 9 2 0
 2 2 2 4 2 2 9 6 3 7 4 6 8 9 3 3 0 8 8 3 7 7 8 4 4 4 3 4 4 4 4 9 8 4 8 4 4
 4 3 3 7 8 5 7 3 5 4 7 1 5 5 3 6 5 6 6 8 6 6 6 5 3 6 7 6 6 8 8 7 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 6 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  0  0  2  0  0  4  1  3  1]
 [ 2 10  0  0  0  0  2  0  0  1]
 [ 1  1  8  1  1  0  1  0  0  2]
 [ 1  0  0  4  1  0  1  3  4  1]
 [ 0  0  0  1 11  0  0  0  2  1]
 [ 0  1  0  4  1  4  1  3  1  0]
 [ 0  0  0  1  0  2  8  1  3  0]
 [ 1  0  0  0  1  0  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  0  0  0  1  1  0 12]]
49.06374716758

In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 100, 2000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 1000, 100, 3000)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 1000, 100, 4000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

76.44954991340637 0.23289251327514648 0.54 [8 4 6 6 0 0 4 8 0 7 9 6 0 6 8 6 1 0 1 1 1 1 6 6 1 0 1 1 1 3 2 3 2 1 9 2 0
 2 2 4 4 9 2 9 6 3 7 4 0 8 9 7 3 8 8 8 9 7 4 8 4 4 4 3 4 4 4 4 9 5 4 8 4 4
 4 8 5 7 8 5 7 5 5 4 7 1 5 4 0 6 8 6 6 8 6 6 4 7 3 6 7 5 6 8 8 7 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 8 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  0  0  0  2  0  4  1  3  1]
 [ 2  9  0  1  0  0  3  0  0  0]
 [ 1  1  6  1  2  0  1  0  0  3]
 [ 1  0  0  2  2  0  0  3  5  2]
 [ 0  0  0  1 11  1  0  0  1  1]
 [ 1  1  0  0  2  5  1  3  2  0]
 [ 0  0  0  1  1  1  6  2  4  0]
 [ 1  0  0  0  1  0  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  0  0  0  0  1  1 12]]
93.74168610572

In [None]:
training_time_2000, testing_time_2000, accuracy_2000, confusion_2000, prediction_2000 = RF_axis_aligned(train_encoded_2000, train_label_2000, test_encoded_2000, test_label_2000, 1000, 200, 2000)
training_time_3000, testing_time_3000, accuracy_3000, confusion_3000, prediction_3000 = RF_axis_aligned(train_encoded_3000, train_label_3000, test_encoded_3000, test_label_3000, 1000, 200, 3000)
training_time_4000, testing_time_4000, accuracy_4000, confusion_4000, prediction_4000 = RF_axis_aligned(train_encoded_4000, train_label_4000, test_encoded_4000, test_label_4000, 1000, 200, 4000)

print(training_time_2000, testing_time_2000, accuracy_2000, prediction_2000, np.array(test_label_2000), confusion_2000)
print(training_time_3000, testing_time_3000, accuracy_3000, prediction_3000, np.array(test_label_3000), confusion_3000)
print(training_time_4000, testing_time_4000, accuracy_4000, prediction_4000, np.array(test_label_4000), confusion_4000)

81.06973648071289 0.28453993797302246 0.5733333333333334 [8 4 6 6 0 0 4 8 0 7 9 6 0 6 8 6 1 0 1 1 1 1 6 3 1 0 1 1 1 3 2 3 2 1 2 2 8
 2 2 2 4 2 2 9 6 3 7 4 6 8 9 3 3 0 8 8 9 7 4 8 4 4 4 3 4 4 4 4 9 8 4 8 4 4
 4 8 5 7 8 5 7 3 5 4 7 2 5 4 3 1 8 6 6 8 6 6 6 5 3 6 7 6 6 8 8 7 7 7 7 7 7
 7 7 4 0 7 7 7 7 7 8 8 8 8 8 8 8 8 9 8 2 8 8 8 8 2 9 8 9 9 9 9 9 7 9 9 9 9
 9 9] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9] [[ 4  0  0  0  2  0  4  1  3  1]
 [ 2  9  0  2  0  0  2  0  0  0]
 [ 0  1  9  1  1  0  1  0  1  1]
 [ 1  0  0  3  2  0  1  2  4  2]
 [ 0  0  0  1 11  0  0  0  2  1]
 [ 0  1  1  2  2  4  0  3  2  0]
 [ 0  0  0  1  0  1  8  1  4  0]
 [ 1  0  0  0  1  0  0 13  0  0]
 [ 0  0  1  0  0  0  0  0 13  1]
 [ 0  0  1  0  0  0  0  1  1 12]]
