In [2]:
import os
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import preprocessor as prp

In [9]:
dataset_path = os.path.abspath(r'D:\Program Files (x86)\painting-to-artist\medium_dataset')

n = 100
ratio = 0.8
size = 64
pca_components_threshold = 0.95

x_train, x_test, y_train, y_test, le = prp.preprocess_data(dataset_path, n, ratio, size, pca_components_threshold, 'minmax')

In [13]:
model_iterations = 100

# Random Forest Model

In [10]:
rf_hyperparameters = {
    'n_estimators': [4, 6, 8],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 8],
    'min_samples_leaf': [1, 2, 3]
}

rf_model = RandomForestClassifier()
rf_grid_search = GridSearchCV(estimator=rf_model, param_grid=rf_hyperparameters, cv=5, scoring='accuracy', n_jobs=3)
rf_grid_search.fit(x_train, y_train)

print("** Best RF Accuracy: ** {:.6f}".format(rf_grid_search.best_score_))
print("**  Best RF Params:  ** {}".format(rf_grid_search.best_params_))

** Best RF Accuracy: ** 0.252290
**  Best RF Params:  ** {'max_depth': 10, 'min_samples_leaf': 3, 'min_samples_split': 5, 'n_estimators': 8}


In [15]:
average_empirical_accuracy = 0
average_true_accuracy = 0

for i in range(model_iterations):
    rf_model = RandomForestClassifier(n_estimators=rf_grid_search.best_params_['n_estimators'],
                                      max_depth=rf_grid_search.best_params_['max_depth'],
                                      min_samples_split=rf_grid_search.best_params_['min_samples_split'],
                                      min_samples_leaf=rf_grid_search.best_params_['min_samples_leaf'])
    rf_model.fit(x_train, y_train)
    average_empirical_accuracy += rf_model.score(x_train, y_train)
    average_true_accuracy += rf_model.score(x_test, y_test)

average_empirical_accuracy /= model_iterations
average_true_accuracy /= model_iterations
   
print("Overall best: Empirical Accuracy: {:.6f}, True Accuracy: {:.6f}".format(average_empirical_accuracy, average_true_accuracy))

Overall best: Empirical Accuracy: 0.912152, True Accuracy: 0.242746


# SVM Model

In [11]:
svm_hyperparameters = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

svm_model = SVC()
svm_grid_search = GridSearchCV(estimator=svm_model, param_grid=svm_hyperparameters, cv=5, scoring='accuracy', n_jobs=3)
svm_grid_search.fit(x_train, y_train)

print("** Best SVM Accuracy: ** {:.2f}".format(svm_grid_search.best_score_))
print("**  Best SVM Params:  ** {}".format(svm_grid_search.best_params_))

** Best SVM Accuracy: ** 0.51
**  Best SVM Params:  ** {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [16]:
average_empirical_accuracy = 0
average_true_accuracy = 0

for i in range(model_iterations):
    svm_model = SVC(C=svm_grid_search.best_params_['C'],
                    kernel=svm_grid_search.best_params_['kernel'],
                    gamma=svm_grid_search.best_params_['gamma'])
    svm_model.fit(x_train, y_train)
    average_empirical_accuracy += svm_model.score(x_train, y_train)
    average_true_accuracy += svm_model.score(x_test, y_test)
    
average_empirical_accuracy /= model_iterations
average_true_accuracy /= model_iterations

print("Overall best: Empirical Accuracy: {:.6f}, True Accuracy: {:.6f}".format(average_empirical_accuracy, average_true_accuracy))

Overall best: Empirical Accuracy: 1.000000, True Accuracy: 0.514085
