# `nnetsauce`'s Ridge2 Multitask classifier

This notebook demonstrates the use of `nnetsauce`'s Ridge2 Multitask classifier. It's a quasi-randomized network model for classification with 2 shrinkage parameters (one on input data, another one on the hidden layer). In this model, a multi-class classification problem is turned into multiple two-class problems.

In [0]:
pip install git+https://github.com/thierrymoudiki/nnetsauce.git

In [0]:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer, load_wine, load_iris, load_digits, make_classification
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

In [32]:
# dataset no. 1 ---------- 

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

print(Z.shape)
 
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=np.int(9.83730469e+01), 
                                   dropout=4.31054687e-01, 
                                   n_clusters=np.int(1.71484375e+00),
                                   lambda1=1.24023438e+01, lambda2=7.30263672e+03)

start = time()
fit_obj.fit(X_train, y_train)
print(time() - start)

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))


start = time()
preds = fit_obj.predict(X_test)
print(time() - start)
print(metrics.classification_report(preds, y_test))


(569, 30)
0.04428410530090332
0.9824561403508771
0.9809555629802873
0.0019502639770507812
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        41
           1       0.99      0.99      0.99        73

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [33]:
# dataset no. 2 ----------

wine = load_wine()
Z = wine.data
t = wine.target
np.random.seed(123)
Z_train, Z_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=15,
                                  dropout=0.1, n_clusters=3, 
                                  type_clust="gmm")

start = time()
fit_obj.fit(Z_train, y_train)
print(time() - start)

print(fit_obj.score(Z_test, y_test))

preds = fit_obj.predict(Z_test)
print(metrics.classification_report(preds, y_test))

0.014364957809448242
1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        17

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [34]:
# dataset no. 3 ----------

iris = load_iris()
Z = iris.data
t = iris.target
np.random.seed(123)
Z_train, Z_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)


fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=10,
                                  dropout=0.1, n_clusters=2)

start = time()
fit_obj.fit(Z_train, y_train)
print(time() - start)

print(fit_obj.score(Z_test, y_test))

preds = fit_obj.predict(Z_test)
print(metrics.classification_report(preds, y_test))

0.030297279357910156
0.9666666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      0.86      0.92         7
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.95      0.96        30
weighted avg       0.97      0.97      0.97        30



In [35]:
# dataset no. 4 ----------

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=25,
                                  dropout=0.1, n_clusters=3, 
                                  type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(time() - start)
print(fit_obj.score(X_test, y_test))

start = time()
preds = fit_obj.predict(X_test)
print(time() - start)
print(metrics.classification_report(preds, y_test))


0.26060009002685547
0.9361111111111111
0.0030503273010253906
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        40
           1       1.00      0.85      0.92        40
           2       0.92      0.97      0.94        34
           3       0.94      0.89      0.91        35
           4       0.95      1.00      0.98        40
           5       0.97      1.00      0.99        36
           6       0.95      0.95      0.95        43
           7       1.00      0.89      0.94        35
           8       0.68      0.96      0.79        26
           9       0.96      0.87      0.92        31

    accuracy                           0.94       360
   macro avg       0.94      0.94      0.93       360
weighted avg       0.95      0.94      0.94       360

