

This notebook demonstrates the use of `nnetsauce`'s RandomBag (bootstrap aggregating of quasi-randomized networks) classifier.

In [1]:
pip install git+https://github.com/thierrymoudiki/nnetsauce.git

Collecting git+https://github.com/thierrymoudiki/nnetsauce.git
  Cloning https://github.com/thierrymoudiki/nnetsauce.git to /tmp/pip-req-build-5avsisea
  Running command git clone -q https://github.com/thierrymoudiki/nnetsauce.git /tmp/pip-req-build-5avsisea
Building wheels for collected packages: nnetsauce
  Building wheel for nnetsauce (setup.py) ... [?25l[?25hdone
  Created wheel for nnetsauce: filename=nnetsauce-0.2.0-cp36-none-any.whl size=81119 sha256=decd769f4534b7b8075c53b53742394c319388b572e3fef681618764c468c2b1
  Stored in directory: /tmp/pip-ephem-wheel-cache-f2zhei62/wheels/c7/77/cc/a317b05d253859b63000b77a371ac0fd6f8b428a57752a70ef
Successfully built nnetsauce
Installing collected packages: nnetsauce
Successfully installed nnetsauce-0.2.0


In [0]:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer, load_wine, load_iris, make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time


In [3]:
# dataset no. 1 ----------

# logistic reg
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
                                direct_link=True,
                                n_estimators=100, 
                                col_sample=0.9, row_sample=0.9,
                                dropout=0.3, n_clusters=0, verbose=1)

start = time()
fit_obj.fit(X_train, y_train)
print(time() - start)
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))


start = time()
preds = fit_obj.predict(X_test)
print(time() - start)
print(metrics.classification_report(preds, y_test))




100/100 [██████████████████████████████] - 1s 6ms/step
0.630929708480835
100/100 [██████████████████████████████] - 0s 367us/step
0.9298245614035088
100/100 [██████████████████████████████] - 0s 362us/step
0.9077848312729703
100/100 [██████████████████████████████] - 0s 360us/step
0.037676095962524414
              precision    recall  f1-score   support

           0       0.83      0.97      0.89        35
           1       0.99      0.91      0.95        79

    accuracy                           0.93       114
   macro avg       0.91      0.94      0.92       114
weighted avg       0.94      0.93      0.93       114



In [4]:
# dataset no. 2 ----------

wine = load_wine()
Z = wine.data
t = wine.target
np.random.seed(123)
Z_train, Z_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=5,
                                direct_link=True,
                                n_estimators=100, 
                                col_sample=0.5, row_sample=0.5,
                                dropout=0.1, n_clusters=3, 
                                type_clust="gmm", verbose=1)

fit_obj.fit(Z_train, y_train)
print(fit_obj.score(Z_test, y_test))

preds = fit_obj.predict(Z_test)
print(metrics.classification_report(preds, y_test))




100/100 [██████████████████████████████] - 1s 13ms/step
100/100 [██████████████████████████████] - 0s 623us/step
1.0
100/100 [██████████████████████████████] - 0s 597us/step
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        17

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [5]:
# dataset no. 3 ----------

iris = load_iris()
Z = iris.data
t = iris.target
np.random.seed(123)
Z_train, Z_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 
                         random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=5,
                                direct_link=False,
                                n_estimators=100, 
                                col_sample=0.5, row_sample=0.5,
                                dropout=0.1, n_clusters=0, verbose=0,
                                n_jobs=1)

fit_obj.fit(Z_train, y_train)
# model accuracy on test set 
print(fit_obj.score(Z_test, y_test))



0.9333333333333333


In [6]:

# dataset no. 4 ----------

X, y = make_classification(n_samples=2500, n_features=20, 
                                               random_state=783451)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=351452)

clf = DecisionTreeClassifier(max_depth=1, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=5,
                                direct_link=True,
                                n_estimators=100, 
                                col_sample=0.5, row_sample=0.5,
                                dropout=0.1, n_clusters=3, 
                                type_clust="gmm", verbose=1)

fit_obj.fit(X_train, y_train)
print(fit_obj.score(X_test, y_test))

preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))


100/100 [██████████████████████████████] - 9s 85ms/step
100/100 [██████████████████████████████] - 0s 1ms/step
0.912
100/100 [██████████████████████████████] - 0s 1ms/step
              precision    recall  f1-score   support

           0       0.88      0.96      0.91       246
           1       0.95      0.87      0.91       254

    accuracy                           0.91       500
   macro avg       0.91      0.91      0.91       500
weighted avg       0.92      0.91      0.91       500

