### Comparison Purpose

In [1]:
from data_loader import load_txt

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from metrics import get_performance_measure, print_metric_score

## SVM

In [4]:
print('dataset 1')
X_train, X_test, y_train, y_test = load_txt(1) 
clf = SVC(kernel='linear', probability=True)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

accuracy, precison, recall, f1, auc = get_performance_measure(y_test, y_pred, y_pred_proba[:, 1])
print_metric_score('SVM linear', accuracy, precison, recall, f1, auc)

clf = SVC(kernel='rbf', probability=True)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

accuracy, precison, recall, f1, auc = get_performance_measure(y_test, y_pred, y_pred_proba[:, 1])
print_metric_score('SVM rbf', accuracy, precison, recall, f1, auc)

dataset 1
SVM linear
accuracy: 0.9736842105263158
precison: 0.9777777777777777
recall: 0.9565217391304348
f1: 0.967032967032967
auc: 0.9980818414322251
SVM rbf
accuracy: 0.9736842105263158
precison: 0.9777777777777777
recall: 0.9565217391304348
f1: 0.967032967032967
auc: 0.9980818414322251


In [5]:
print('dataset 2')
X_train, X_test, y_train, y_test = load_txt(2) 
clf = SVC(kernel='linear', probability=True)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

accuracy, precison, recall, f1, auc = get_performance_measure(y_test, y_pred, y_pred_proba[:, 1])
print_metric_score('SVM linear', accuracy, precison, recall, f1, auc)

clf = SVC(kernel='rbf', probability=True, gamma=1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

accuracy, precison, recall, f1, auc = get_performance_measure(y_test, y_pred, y_pred_proba[:, 1])
print_metric_score('SVM rbf', accuracy, precison, recall, f1, auc)

dataset 2
SVM linear
accuracy: 0.7311827956989247
precison: 0.5151515151515151
recall: 0.6538461538461539
f1: 0.576271186440678
auc: 0.7933409873708381
SVM rbf
accuracy: 0.7956989247311828
precison: 0.64
recall: 0.6153846153846154
f1: 0.6274509803921569
auc: 0.8005166475315728


## Random Forest

In [3]:
print('dataset 1')
X_train, X_test, y_train, y_test = load_txt(1) 
clf = RandomForestClassifier(n_estimators=20, max_depth=2, random_state=0)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

accuracy, precison, recall, f1, auc = get_performance_measure(y_test, y_pred, y_pred_proba[:, 1])
print_metric_score('random forest', accuracy, precison, recall, f1, auc)

print('dataset 2')
X_train, X_test, y_train, y_test = load_txt(2) 
clf = RandomForestClassifier(n_estimators=20, max_depth=2, random_state=0)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

accuracy, precison, recall, f1, auc = get_performance_measure(y_test, y_pred, y_pred_proba[:, 1])
print_metric_score('random forest', accuracy, precison, recall, f1, auc)

dataset 1
random forest
accuracy: 0.956140350877193
precison: 0.9183673469387755
recall: 0.9782608695652174
f1: 0.9473684210526316
auc: 0.9945652173913043
dataset 2
random forest
accuracy: 0.6881720430107527
precison: 0.43478260869565216
recall: 0.38461538461538464
f1: 0.40816326530612246
auc: 0.7422502870264065
