In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
# from sklearn.model_selection import train_test_split


# Checks if feature data has been created locally
if not os.path.exists("mask_data.csv"):
    import featurize
    
data = np.array(pd.read_csv('mask_data.csv', sep=',',header=None))
X, Y = np.split(data, [-1], axis=1)
Y = Y.flatten()

# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=0)

In [28]:
from sklearn import svm

def svm_experiment(kernel : str, C : int, k : int, degree=3):
    """
    Sample code for SVM svc from https://scikit-learn.org/stable/modules/cross_validation.html
    
    Helper function to run k-fold cross validation on a SVM with a specified kernel.
    Returns the trained classifier.
    """
    clf = svm.SVC(kernel=kernel, C=C, random_state=0, degree=degree)
    scores = cross_val_score(clf, X, Y, cv=k)
    print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))
    return clf

In [5]:
linear_svm_model = svm_experiment("linear", 1, 5)

0.86 accuracy with a standard deviation of 0.01


In [31]:
gaussian_svm_model = svm_experiment("rbf", 1, 5)

0.92 accuracy with a standard deviation of 0.01


In [29]:
cubic_svm_model = svm_experiment("poly", 1, 5, 5)

0.89 accuracy with a standard deviation of 0.01


In [42]:
import xgboost as xgb

def xgb_experiment(rounds : int, k : int, param : dict):
    """ 
    Sample code for xgboost from:
    https://xgboost.readthedocs.io/en/latest/python/examples/cross_validation.html
    """
    dtrain = xgb.DMatrix(X, label=Y)

    return xgb.cv(param, dtrain, rounds, nfold=k,
           metrics={'merror'}, early_stopping_rounds=10, seed=0,
           callbacks=[xgb.callback.EvaluationMonitor(show_stdv=True)])

In [None]:
param = {
    'max_depth':5, 
    'min_child_weight':1, 
    'gamma':0,
    'eta':2, 
    'num_class':4,
    'subsample':0.8,
    'colsample_bytree':0.8,
}

xgb_experiment(10, 5, param)

[0]	train-merror:0.12109+0.00384	test-merror:0.18441+0.00992
[1]	train-merror:0.07197+0.00645	test-merror:0.16659+0.01258
[2]	train-merror:0.04089+0.00466	test-merror:0.14998+0.01087
[3]	train-merror:0.02458+0.00783	test-merror:0.14393+0.01742
[4]	train-merror:0.01310+0.00778	test-merror:0.13261+0.01353
[5]	train-merror:0.01106+0.01501	test-merror:0.13246+0.02385
[6]	train-merror:0.01182+0.02086	test-merror:0.12657+0.02318
[7]	train-merror:0.01816+0.03585	test-merror:0.12763+0.03098
