In [1]:
import classifiers as clfs
from util import get_data, get_train_test, print_metrics
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier
import time

In [2]:
x, y = get_data()

In [3]:
cv_clfs = [LogisticRegression(), GradientBoostingClassifier(random_state=42), svm.SVC(), MLPClassifier(hidden_layer_sizes=int(2* len(x.columns) / 3), random_state=42, activation='logistic')]
clf_names = ["Log Reg", "Gradient Boosting", "SVM", "NN"]
for i, clf in enumerate(cv_clfs):
    print(clf_names[i])
    st = time.time()
    acc_score = cross_val_score(clf, x, y, scoring='accuracy')
    prec_score = cross_val_score(clf, x, y, scoring='precision')
    f1_score = cross_val_score(clf, x, y, scoring='f1')
    et = time.time()
    print("Runtime: ", et - st)
    print("Accuracy: ", acc_score.mean())
    print("Precision: ", prec_score.mean())
    print("F1 Score: ", f1_score.mean())
    print()

Log Reg
Runtime:  0.25724291801452637
Accuracy:  0.5917467421350533
Precision:  0.6138602152899935
F1 Score:  0.7101714557008092

Gradient Boosting
Runtime:  5.418907403945923
Accuracy:  0.47866262998552067
Precision:  0.6070928186583566
F1 Score:  0.4862777598561614

SVM
Runtime:  1.0128228664398193
Accuracy:  0.5842613312272388
Precision:  0.5858978168073864
F1 Score:  0.7375638494779106

NN
Runtime:  3.263188123703003
Accuracy:  0.588008424378044
Precision:  0.5890748476180652
F1 Score:  0.7377999163383008



In [4]:
x_train, y_train, x_test, y_test = get_train_test()

In [5]:
st = time.time() # start time
y_nb = clfs.naiveBayes(x_train, y_train, x_test)
et = time.time() # end time
print("Naive Bayes Statistics")
print("Runtime: ", et - st)
print_metrics(y_nb, y_test)

Naive Bayes Statistics
Runtime:  0.0
Accuracy:  0.35514018691588783
Precision:  0.6
F1 Score:  0.04166666666666667


In [6]:
st = time.time()
y_lr = clfs.logistricRegression(x_train, y_train, x_test)
et = time.time()
print("Logistic Regression Statistics")
print("Runtime: ", et - st)
print_metrics(y_lr, y_test)

Logistic Regression Statistics
Runtime:  0.004505157470703125
Accuracy:  0.6495327102803738
Precision:  0.6495327102803738
F1 Score:  0.7875354107648725


In [7]:
st = time.time()
y_dt = clfs.decisionTree(x_train, y_train, x_test)
et = time.time()
print("Decision Tree Statistics")
print("Runtime: ", et - st)
print_metrics(y_dt, y_test)

Decision Tree Statistics
Runtime:  0.013005971908569336
Accuracy:  0.5700934579439252
Precision:  0.6942148760330579
F1 Score:  0.6461538461538461


In [8]:
st = time.time()
y_rf = clfs.randomForest(x_train, y_train, x_test, 100)
et = time.time()
print("Random Forest (n = {n}) Statistics")
print("Runtime: ", et - st)
print_metrics(y_rf, y_test)

Random Forest (n = {n}) Statistics
Runtime:  0.23760104179382324
Accuracy:  0.6401869158878505
Precision:  0.7012987012987013
F1 Score:  0.7372013651877133


In [9]:
st = time.time()
y_gb = clfs.gradientBoost(x_train, y_train, x_test)
et = time.time()
print("Gradient Boost Statistics")
print("Runtime: ", et - st)
print_metrics(y_gb, y_test)

Gradient Boost Statistics
Runtime:  0.341555118560791
Accuracy:  0.6588785046728972
Precision:  0.726027397260274
F1 Score:  0.7438596491228071


In [10]:
st = time.time()
y_svm = clfs.SVM(x_train, y_train, x_test)
et = time.time()
print("SVM Statistics")
print("Runtime: ", et - st)
print_metrics(y_svm, y_test)

SVM Statistics
Runtime:  0.06299328804016113
Accuracy:  0.6495327102803738
Precision:  0.6495327102803738
F1 Score:  0.7875354107648725


In [11]:
st = time.time()
y_nn = clfs.neuralNetwork(x_train, y_train, x_test)
et = time.time()
print("Neural Network Statistics")
print("Runtime: ", et - st)
print_metrics(y_nn, y_test)

Neural Network Statistics
Runtime:  0.23452329635620117
Accuracy:  0.6308411214953271
Precision:  0.6456310679611651
F1 Score:  0.7710144927536232
