In [1]:
from GenoClassifier import *
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier

In [2]:
log_clf = LogisticRegression(random_state=42, C=3e-6)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(random_state=42, max_iter=1000)
# mlp_clf = MLPClassifier(random_state=42, max_iter=1000, activation='logistic')
# mods = {"Logistic Regression":log_clf,"Random Forests": rnd_clf,"SVM": svm_clf,"MLP": mlp_clf}
mods = {"Logistic Regression":log_clf,"Random Forests": rnd_clf,"SVM": svm_clf}
bags = dict()
estimators = list()
for name, mod in mods.items():
    bag = BaggingClassifier(mod, n_estimators=100,
                            max_samples=50, bootstrap=True, n_jobs=-1, random_state=42)
    bags[name] = bag
    estimators.append((name, ))
voting_clf = VotingClassifier(
    estimators=estimators,
    voting='hard')

# Using Diff Values

In [3]:
gc = GenoClassifier('results/breast_cancer2.results', 'datasets/breast_cancer2_y', diff=True, select_features=True, dim_reduct=True)

### Using bagging with 10-folds

In [4]:
for name, bag in bags.items():
    print("Using bag of ",name)
    for metric, score in gc.classify(k=10, model=bag).items():
        print(metric, score)
    print("-"*10)

Using bag of  Logistic Regression
accuracy 0.613055555556
recall 0.748095238095
f1 0.652702297702
precision 0.61
----------
Using bag of  SVM
accuracy 0.6475
recall 0.6475
f1 0.785769230769
precision 1.0
----------
Using bag of  Random Forests
accuracy 0.740833333333
recall 0.733928571429
f1 0.835018315018
precision 0.983333333333
----------


### Using bagging with 10-random-folds

In [5]:
for name, bag in bags.items():
    print("Using bag of ",name)
    for metric, score in gc.classify(k=10, model=bag, random_sampling=True, test_size=0.20).items():
        print(metric, score)
    print("-"*10)

Using bag of  Logistic Regression
accuracy 0.627777777778
recall 0.709296259296
f1 0.67806993007
precision 0.67203962704
----------
Using bag of  SVM
accuracy 0.661111111111
recall 0.661111111111
f1 0.793989750516
precision 1.0
----------
Using bag of  Random Forests
accuracy 0.705555555556
recall 0.715776143791
f1 0.813555240685
precision 0.964331501832
----------


### Only bagging without K-folds

In [7]:
for name, bag in bags.items():
    print("Using bag of ",name)
    for metric, score in gc.classify(k=-1, model=bag, test_size=0.20).items():
        print(metric, score)
    print("-"*10)

Using bag of  Logistic Regression
accuracy -0.555555555556
recall -0.692307692308
f1 -0.692307692308
precision -0.692307692308
----------
Using bag of  SVM
accuracy -0.722222222222
recall -0.722222222222
f1 -0.838709677419
precision -1.0
----------
Using bag of  Random Forests
accuracy -0.722222222222
recall -0.722222222222
f1 -0.838709677419
precision -1.0
----------


# Using no Diff values

In [None]:
gc_raw = GenoClassifier('results/breast_cancer2.results', 'datasets/breast_cancer2_y', diff=False, select_features=True, dim_reduct=True, scale=True)

In [None]:
for name, bag in bags.items():
    print("Using bag of ",name)
    for metric, score in gc_raw.classify(k=10, model=bag).items():
        print(metric, score)
    print("-"*10)

Using bag of  Logistic Regression




accuracy 0.688888888889
recall 0.795476190476
f1 0.740388500389
precision 0.71
----------
Using bag of  SVM


In [None]:
for name, bag in bags.items():
    print("Using bag of ",name)
    for metric, score in gc_raw.classify(k=10, model=bag, random_sampling=True).items():
        print(metric, score)
    print("-"*10)

In [None]:
for name, bag in bags.items():
    print("Using bag of ",name)
    for metric, score in gc_raw.classify(k=-1, model=bag).items():
        print(metric, score)
    print("-"*10)