In [6]:
import os
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import random
import preprocessor as prp

In [7]:
dataset_path = os.path.abspath(r'D:\Program Files (x86)\painting-to-artist\medium_dataset')

n = 70
ratio = 0.8
size = 128
pca_components_threshold = 0.95

x_train, x_test, y_train, y_test, le = prp.preprocess_data(dataset_path, n, ratio, size, pca_components_threshold)

In [8]:
model_iterations = 10

In [9]:
rf_hyperparameters = {
    'n_estimators': [4, 6, 8],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 8],
    'min_samples_leaf': [1, 2, 3]
}

best_accuracy = 0
best_params = None

for params in ParameterGrid(rf_hyperparameters):
    
    average_empirical_accuracy = 0
    average_true_accuracy = 0
    counter = 1
    
    while counter <= model_iterations:
        random_seed = random.randint(0, 9999)
        rf_model = RandomForestClassifier(**params, random_state=random_seed)

        rf_model.fit(x_train, y_train)
        empirical_accuracy = rf_model.score(x_train, y_train)

        rf_prediction = rf_model.predict(x_test)
        true_accuracy = rf_model.score(x_test, y_test)

        average_empirical_accuracy += empirical_accuracy
        average_true_accuracy += true_accuracy
        counter += 1
    
    empirical_accuracy = average_empirical_accuracy / model_iterations
    true_accuracy = average_true_accuracy / model_iterations

    print("RF Model: Parameters - {}\n Average over {} iterations - Empirical Accuracy: {:.6f}, True Accuracy: {:.6f}\n".format(params, model_iterations, empirical_accuracy, true_accuracy))
    
    if true_accuracy > best_accuracy:
        best_accuracy = true_accuracy
        best_params = params
        
print("BEST RF Model: Parameters - {}\n Average over {} iterations - Accuracy: {:.6f}\n".format(best_params, model_iterations, best_accuracy))

RF Model: Parameters - {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 4}
 Average over 10 iterations - Empirical Accuracy: 0.897222, True Accuracy: 0.226471
RF Model: Parameters - {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 6}
 Average over 10 iterations - Empirical Accuracy: 0.955370, True Accuracy: 0.241912
RF Model: Parameters - {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 8}
 Average over 10 iterations - Empirical Accuracy: 0.981296, True Accuracy: 0.247059
RF Model: Parameters - {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 4}
 Average over 10 iterations - Empirical Accuracy: 0.862222, True Accuracy: 0.219118
RF Model: Parameters - {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 6}
 Average over 10 iterations - Empirical Accuracy: 0.934259, True Accuracy: 0.253676
RF Model: Parameters - {'max_depth'

# Random Forest Model

In [10]:
rf_model = RandomForestClassifier()
rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_hyperparameters, cv=5, scoring='accuracy', n_jobs=3)
rf_grid_search.fit(x_train, y_train)

print("** Best RF Accuracy: ** {:.6f}".format(rf_grid_search.best_score_))
print("**  Best RF Params:  ** {}".format(rf_grid_search.best_params_))

** Best RF Accuracy: ** 0.28
**  Best RF Params:  ** {'max_depth': 20, 'min_samples_leaf': 3, 'min_samples_split': 8, 'n_estimators': 8}


# SVM Model

In [None]:
svm_hyperparameters = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

svm_model = SVC()
svm_grid_search = GridSearchCV(estimator=svm_model, param_grid=svm_hyperparameters, cv=5, scoring='accuracy', n_jobs=3)
svm_grid_search.fit(x_train, y_train)

print("** Best SVM Accuracy: ** {:.2f}".format(svm_grid_search.best_score_))
print("**  Best SVM Params:  ** {}".format(svm_grid_search.best_params_))