## In this demo, we will show how to use uq360 metrics for hyperparameters optimization using Lale and sklearn gridsearchCV.

In [1]:
%matplotlib inline

In [2]:
import sys
sys.path.append("../../")

In [3]:
!pip install lale
!pip install 'liac-arff>=2.4.0'



In [4]:
from uq360.utils.misc import make_sklearn_compatible_scorer

### The uq360 metrics can be converted to a sklearn scorer usign `make_sklearn_compatible_scorer` utility function. 

#### We will now show to how to convert ECE and AURRRC metric in uq360 into scorer that can be used with LALE framework for hyperparameter optimization.

In [5]:
sklearn_aurrrc = make_sklearn_compatible_scorer(task_type="classification", metric="aurrrc", greater_is_better=False)
sklearn_ece = make_sklearn_compatible_scorer(task_type="classification", metric="ece", greater_is_better=False)

In [6]:
import lale
from lale.lib.lale import Hyperopt
lale.wrap_imported_operators()

In [7]:
from sklearn import datasets
X, y = datasets.load_breast_cancer(return_X_y=True)

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [10]:
from lale.lib.sklearn import AdaBoostClassifier as Model

In [11]:
clf_ece = Hyperopt(estimator=Model, cv=3, max_evals=20, scoring=sklearn_ece, verbose=True)
clf_aurrrc = Hyperopt(estimator=Model, cv=3, max_evals=20, scoring=sklearn_aurrrc, verbose=True)

In [12]:
trained_with_ece = clf_ece.fit(X_train, y_train)

100%|██████████| 20/20 [00:29<00:00,  1.46s/trial, best loss: 0.259819517887197] 


In [13]:
trained_with_aurrrc = clf_aurrrc.fit(X_train, y_train)

100%|██████████| 20/20 [00:30<00:00,  1.51s/trial, best loss: 0.004077380952380953]


In [14]:
clf_accuracy = Hyperopt(estimator=Model, cv=3, max_evals=20, verbose=True)
trained_with_accuracy = clf_accuracy.fit(X_train, y_train)

100%|██████████| 20/20 [00:29<00:00,  1.50s/trial, best loss: -0.9606299212598426]


In [15]:
from sklearn.metrics import classification_report

In [16]:
print(classification_report(y_test, trained_with_accuracy.predict(X_test)))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98        67
           1       0.99      0.98      0.99       121

    accuracy                           0.98       188
   macro avg       0.98      0.98      0.98       188
weighted avg       0.98      0.98      0.98       188



In [17]:
print(classification_report(y_test, trained_with_ece.predict(X_test)))

              precision    recall  f1-score   support

           0       0.96      0.99      0.97        67
           1       0.99      0.98      0.98       121

    accuracy                           0.98       188
   macro avg       0.97      0.98      0.98       188
weighted avg       0.98      0.98      0.98       188



In [18]:
print(classification_report(y_test, trained_with_aurrrc.predict(X_test)))

              precision    recall  f1-score   support

           0       0.96      0.97      0.96        67
           1       0.98      0.98      0.98       121

    accuracy                           0.97       188
   macro avg       0.97      0.97      0.97       188
weighted avg       0.97      0.97      0.97       188



## Lets us now see how to use `uq360` models with sklearn's `GridsearchCV`

### We use the boston housing dataset with `QuantileRegression` model while optimizing the `picp` metric.

In [19]:
from sklearn.model_selection import GridSearchCV
import pandas as pd

In [20]:
house_prices_dataset = datasets.load_boston()
house_prices_df = pd.DataFrame(house_prices_dataset['data'])
house_prices_df.columns = house_prices_dataset['feature_names']

all_features = ['RM','CRIM','PTRATIO', 'DIS']
X = house_prices_df[all_features].values
y = house_prices_dataset['target']

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [22]:
base_config = {
    "alpha":0.95,
    "n_estimators":20,
    "max_depth":3,
    "learning_rate":0.1,
    "min_samples_leaf":20,
    "min_samples_split":20
}

configs  = {"config": []}
for num_estimators in [1, 2, 5, 10, 20, 30, 40, 50]:
    config = base_config.copy()
    config["n_estimators"] = num_estimators
    configs["config"].append(config)

In [23]:
from uq360.algorithms.quantile_regression import QuantileRegression

In [24]:
sklearn_picp = make_sklearn_compatible_scorer(task_type="regression", metric="picp", greater_is_better=True)

In [25]:
clf = GridSearchCV(QuantileRegression(config=base_config), configs, scoring=sklearn_picp)

In [26]:
clf.fit(X_train, y_train)

GridSearchCV(estimator=<uq360.algorithms.quantile_regression.quantile_regression.QuantileRegression object at 0x7ff1c83a9650>,
             param_grid={'config': [{'alpha': 0.95, 'learning_rate': 0.1,
                                     'max_depth': 3, 'min_samples_leaf': 20,
                                     'min_samples_split': 20,
                                     'n_estimators': 1},
                                    {'alpha': 0.95, 'learning_rate': 0.1,
                                     'max_depth': 3, 'min_samples_leaf': 20,
                                     'min_samples_split': 20,
                                     '...
                                     'min_samples_split': 20,
                                     'n_estimators': 30},
                                    {'alpha': 0.95, 'learning_rate': 0.1,
                                     'max_depth': 3, 'min_samples_leaf': 20,
                                     'min_samples_split': 20,
               

In [28]:
df = pd.DataFrame(clf.cv_results_)

In [29]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_config,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.054366,0.003077,0.000536,5.7e-05,"{'alpha': 0.95, 'n_estimators': 1, 'max_depth'...","{'config': {'alpha': 0.95, 'n_estimators': 1, ...",0.929577,0.873239,0.915493,0.859155,0.842857,0.884064,0.033151,8
1,0.051386,0.000514,0.000507,6.6e-05,"{'alpha': 0.95, 'n_estimators': 2, 'max_depth'...","{'config': {'alpha': 0.95, 'n_estimators': 2, ...",0.957746,0.887324,0.915493,0.859155,0.857143,0.895372,0.037771,1
2,0.052438,0.001631,0.000601,0.000234,"{'alpha': 0.95, 'n_estimators': 5, 'max_depth'...","{'config': {'alpha': 0.95, 'n_estimators': 5, ...",0.943662,0.901408,0.915493,0.84507,0.857143,0.892555,0.036675,2
3,0.056137,0.005973,0.000513,4.1e-05,"{'alpha': 0.95, 'n_estimators': 10, 'max_depth...","{'config': {'alpha': 0.95, 'n_estimators': 10,...",0.943662,0.887324,0.915493,0.859155,0.828571,0.886841,0.040526,7
4,0.063329,0.007458,0.00062,7.3e-05,"{'alpha': 0.95, 'n_estimators': 20, 'max_depth...","{'config': {'alpha': 0.95, 'n_estimators': 20,...",0.957746,0.887324,0.929577,0.84507,0.842857,0.892515,0.045547,3
5,0.062398,0.006398,0.000588,4.8e-05,"{'alpha': 0.95, 'n_estimators': 30, 'max_depth...","{'config': {'alpha': 0.95, 'n_estimators': 30,...",0.957746,0.873239,0.915493,0.84507,0.842857,0.886881,0.044075,5
6,0.064843,0.006143,0.00087,0.000315,"{'alpha': 0.95, 'n_estimators': 40, 'max_depth...","{'config': {'alpha': 0.95, 'n_estimators': 40,...",0.943662,0.887324,0.915493,0.859155,0.842857,0.889698,0.036634,4
7,0.062831,0.003828,0.000769,0.000323,"{'alpha': 0.95, 'n_estimators': 50, 'max_depth...","{'config': {'alpha': 0.95, 'n_estimators': 50,...",0.943662,0.873239,0.915493,0.859155,0.842857,0.886881,0.037244,5
