In [1]:
# %matplotlib inline


# Classification

The following example shows how to fit a simple classification model with
*auto-sklearn*.


In [2]:
from pprint import pprint
import sklearn.metrics

import autosklearn.classification

## Data Loading



In [3]:
from typing import Optional
from pprint import pprint

import autosklearn.classification
import autosklearn.pipeline.components.data_preprocessing
import sklearn.metrics
from ConfigSpace.configuration_space import ConfigurationSpace

from autosklearn.askl_typing import FEAT_TYPE_TYPE
from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
    def __init__(self, **kwargs):
        """This preprocessors does not change the data"""
        # Some internal checks makes sure parameters are set
        for key, val in kwargs.items():
            setattr(self, key, val)

    def fit(self, X, Y=None):
        return self

    def transform(self, X):
        return X

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            "shortname": "NoPreprocessing",
            "name": "NoPreprocessing",
            "handles_regression": True,
            "handles_classification": True,
            "handles_multiclass": True,
            "handles_multilabel": True,
            "handles_multioutput": True,
            "is_deterministic": True,
            "input": (SPARSE, DENSE, UNSIGNED_DATA),
            "output": (INPUT,),
        }

    @staticmethod
    def get_hyperparameter_search_space(
        feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
    ):
        return ConfigurationSpace()  # Return an empty configuration as there is None


# Add NoPreprocessing component to auto-sklearn.
autosklearn.pipeline.components.data_preprocessing.add_preprocessor(NoPreprocessing)

In [4]:

import pandas as pd
from sklearn.model_selection import train_test_split
import autosklearn
file_path = 'clean_data_random_forest.csv'

df = pd.read_csv(file_path)

df.dropna(subset=['Rating'])
y = df['Rating']
X = df.drop('Rating', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
df = pd.read_csv(file_path)

## Build and fit a classifier



In [6]:
# from autosklearn.experimental.askl2 import AutoSklearn2Classifier

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    include = {
        'classifier': ["mlp"],
        'feature_preprocessor': ["no_preprocessing"],
        'data_preprocessor': ["NoPreprocessing"]
    },
    tmp_folder="tmp/autosklearn_classification_example_tmp9",
)

automl.fit(X_train, y_train, dataset_name="airbnb")
## get configuration for a model/run
run_key = list(automl.automl_.runhistory_.data.keys())[0]
run_value = automl.automl_.runhistory_.data[run_key]
config=automl.automl_.runhistory_.ids_config[run_key.config_id]
print(config)



INITT
helloo


Fitting to the training data:   0%|[32m          [0m| 0/120 [00:00<?, ?it/s, The total time budget for this task is 0:02:00]

SKIPPING RESAMPLING
Before data managing, null counts in X test
0      0
1      0
2      0
3      0
4      0
      ..
133    0
134    0
135    0
136    0
137    0
Length: 138, dtype: int64
inside data manager:
{'X_train':             0         1             2         3             4         5  \
264 -0.415794  0.000000 -6.879288e-01 -0.663075 -4.374283e-01 -0.781371   
615 -0.415794  0.673504  2.396025e+00  1.699917 -7.125304e-01  0.840483   
329 -0.415794  0.673504  8.540479e-01  0.282122 -2.998772e-01 -0.411835   
342  0.000000  0.000000 -1.711938e-16  0.000000 -1.085955e-16  0.000000   
394 -0.415794 -0.606516 -6.879288e-01 -0.663075  2.817947e+00 -0.247596   
..        ...       ...           ...       ...           ...       ...   
71  -0.415794 -0.606516  8.305958e-02 -0.190477 -4.221448e-01 -0.237331   
106 -0.415794 -0.606516  8.305958e-02  0.282122 -4.527117e-01  0.707040   
270 -0.415794 -0.606516  8.305958e-02  0.282122  6.476968e-01 -0.401570   
435 -0.415794 -0.606516 -6.8

Fitting to the training data:   1%|[32m          [0m| 1/120 [00:01<01:59,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:   2%|[32m▎         [0m| 3/120 [00:03<01:57,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:   3%|[32m▎         [0m| 4/120 [00:04<01:56,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:   4%|[32m▍         [0m| 5/120 [00:05<01:55,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:   6%|[32m▌         [0m| 7/120 [00:07<01:53,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:   8%|[32m▊         [0m| 9/120 [00:09<01:51,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:   8%|[32m▊         [0m| 10/120 [00:10<01:50,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  10%|[32m█         [0m| 12/120 [00:12<01:48,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:  11%|[32m█         [0m| 13/120 [00:13<01:47,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  12%|[32m█▎        [0m| 15/120 [00:15<01:45,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  13%|[32m█▎        [0m| 16/120 [00:16<01:44,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  14%|[32m█▍        [0m| 17/120 [00:17<01:43,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  15%|[32m█▌        [0m| 18/120 [00:18<01:42,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  16%|[32m█▌        [0m| 19/120 [00:19<01:41,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  17%|[32m█▋        [0m| 20/120 [00:20<01:40,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:  18%|[32m█▊        [0m| 21/120 [00:21<01:39,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  18%|[32m█▊        [0m| 22/120 [00:22<01:38,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  19%|[32m█▉        [0m| 23/120 [00:23<01:37,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  20%|[32m██        [0m| 24/120 [00:24<01:36,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  21%|[32m██        [0m| 25/120 [00:25<01:35,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:  22%|[32m██▎       [0m| 27/120 [00:27<01:33,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:  23%|[32m██▎       [0m| 28/120 [00:28<01:32,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  24%|[32m██▍       [0m| 29/120 [00:29<01:31,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  25%|[32m██▌       [0m| 30/120 [00:30<01:30,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  27%|[32m██▋       [0m| 32/120 [00:32<01:28,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:  28%|[32m██▊       [0m| 33/120 [00:33<01:27,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  28%|[32m██▊       [0m| 34/120 [00:34<01:26,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  30%|[32m███       [0m| 36/120 [00:36<01:24,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  31%|[32m███       [0m| 37/120 [00:37<01:23,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  32%|[32m███▏      [0m| 38/120 [00:38<01:22,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None
XDATAAA: None


Fitting to the training data:  33%|[32m███▎      [0m| 40/120 [00:40<01:20,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  34%|[32m███▍      [0m| 41/120 [00:41<01:19,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  35%|[32m███▌      [0m| 42/120 [00:42<01:18,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  36%|[32m███▌      [0m| 43/120 [00:43<01:17,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  37%|[32m███▋      [0m| 44/120 [00:44<01:16,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  38%|[32m███▊      [0m| 45/120 [00:45<01:15,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  39%|[32m███▉      [0m| 47/120 [00:47<01:13,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  40%|[32m████      [0m| 48/120 [00:48<01:12,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  41%|[32m████      [0m| 49/120 [00:49<01:11,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  42%|[32m████▏     [0m| 50/120 [00:50<01:10,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  42%|[32m████▎     [0m| 51/120 [00:51<01:09,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  44%|[32m████▍     [0m| 53/120 [00:53<01:07,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  45%|[32m████▌     [0m| 54/120 [00:54<01:06,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  48%|[32m████▊     [0m| 57/120 [00:57<01:03,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  49%|[32m████▉     [0m| 59/120 [00:59<01:01,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  51%|[32m█████     [0m| 61/120 [01:01<00:59,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  52%|[32m█████▎    [0m| 63/120 [01:03<00:57,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  53%|[32m█████▎    [0m| 64/120 [01:04<00:56,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  57%|[32m█████▋    [0m| 68/120 [01:08<00:52,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  60%|[32m██████    [0m| 72/120 [01:12<00:48,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  62%|[32m██████▏   [0m| 74/120 [01:14<00:46,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  63%|[32m██████▎   [0m| 76/120 [01:16<00:44,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  65%|[32m██████▌   [0m| 78/120 [01:18<00:42,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  67%|[32m██████▋   [0m| 80/120 [01:20<00:40,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  68%|[32m██████▊   [0m| 81/120 [01:21<00:39,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  68%|[32m██████▊   [0m| 82/120 [01:22<00:38,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  71%|[32m███████   [0m| 85/120 [01:25<00:35,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  72%|[32m███████▎  [0m| 87/120 [01:27<00:33,  1.01s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  76%|[32m███████▌  [0m| 91/120 [01:31<00:29,  1.01s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  77%|[32m███████▋  [0m| 92/120 [01:32<00:28,  1.01s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  78%|[32m███████▊  [0m| 94/120 [01:34<00:26,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  81%|[32m████████  [0m| 97/120 [01:37<00:23,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  82%|[32m████████▏ [0m| 98/120 [01:38<00:22,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  82%|[32m████████▎ [0m| 99/120 [01:39<00:21,  1.00s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  87%|[32m████████▋ [0m| 104/120 [01:44<00:16,  1.03s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  88%|[32m████████▊ [0m| 105/120 [01:45<00:15,  1.02s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  89%|[32m████████▉ [0m| 107/120 [01:47<00:13,  1.01s/it, The total time budget for this task is 0:02:00]

XDATAAA: None


Fitting to the training data:  90%|[32m█████████ [0m| 108/120 [01:48<00:12,  1.01s/it, The total time budget for this task is 0:02:00]

{'X_train':             0         1             2         3             4         5  \
264 -0.415794  0.000000 -6.879288e-01 -0.663075 -4.374283e-01 -0.781371   
615 -0.415794  0.673504  2.396025e+00  1.699917 -7.125304e-01  0.840483   
329 -0.415794  0.673504  8.540479e-01  0.282122 -2.998772e-01 -0.411835   
342  0.000000  0.000000 -1.711938e-16  0.000000 -1.085955e-16  0.000000   
394 -0.415794 -0.606516 -6.879288e-01 -0.663075  2.817947e+00 -0.247596   
..        ...       ...           ...       ...           ...       ...   
71  -0.415794 -0.606516  8.305958e-02 -0.190477 -4.221448e-01 -0.237331   
106 -0.415794 -0.606516  8.305958e-02  0.282122 -4.527117e-01  0.707040   
270 -0.415794 -0.606516  8.305958e-02  0.282122  6.476968e-01 -0.401570   
435 -0.415794 -0.606516 -6.879288e-01 -1.135674 -5.138455e-01 -0.832696   
102 -0.415794 -0.606516 -6.879288e-01 -0.663075 -5.902628e-01  0.070616   

            6         7         8         9  ...           128           129  \
264  0.

Fitting to the training data: 100%|[32m██████████[0m| 120/120 [01:49<00:00,  1.10it/s, The total time budget for this task is 0:02:00]


Configuration(values={
  'balancing:strategy': 'none',
  'classifier:__choice__': 'mlp',
  'classifier:mlp:activation': 'relu',
  'classifier:mlp:alpha': 0.0001,
  'classifier:mlp:batch_size': 'auto',
  'classifier:mlp:beta_1': 0.9,
  'classifier:mlp:beta_2': 0.999,
  'classifier:mlp:early_stopping': 'valid',
  'classifier:mlp:epsilon': 1e-08,
  'classifier:mlp:hidden_layer_depth': 1,
  'classifier:mlp:learning_rate_init': 0.001,
  'classifier:mlp:n_iter_no_change': 32,
  'classifier:mlp:num_nodes_per_layer': 32,
  'classifier:mlp:shuffle': 'True',
  'classifier:mlp:solver': 'adam',
  'classifier:mlp:tol': 0.0001,
  'classifier:mlp:validation_fraction': 0.1,
  'data_preprocessor:__choice__': 'NoPreprocessing',
  'feature_preprocessor:__choice__': 'no_preprocessing',
})



## View the models found by auto-sklearn



In [7]:
print(automl.leaderboard())

          rank  ensemble_weight type      cost  duration
model_id                                                
70           1             0.04  mlp  0.297170  0.528144
42           3             0.02  mlp  0.301887  0.349071
49           2             0.12  mlp  0.301887  0.371793
2            4             0.02  mlp  0.306604  0.411904
22           6             0.02  mlp  0.306604  0.385399
27           8             0.16  mlp  0.306604  0.479656
47           7             0.02  mlp  0.306604  0.451138
48           5             0.04  mlp  0.306604  0.352690
29           9             0.10  mlp  0.311321  0.425970
44          11             0.08  mlp  0.316038  0.341057
57          10             0.04  mlp  0.316038  0.478840
80          12             0.02  mlp  0.320755  0.558581
34          14             0.02  mlp  0.325472  0.454061
37          15             0.04  mlp  0.325472  0.428214
87          13             0.02  mlp  0.325472  0.545648
14          16             0.02

## Print the final ensemble constructed by auto-sklearn



In [8]:
pprint(automl.show_models(), indent=4)

{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7fe740d8d250>,
           'cost': 0.30660377358490565,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7fe744ce9910>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7fe744984d00>,
           'model_id': 2,
           'rank': 12,
           'sklearn_classifier': MLPClassifier(beta_1=0.999, beta_2=0.9, early_stopping=True,
              hidden_layer_sizes=(32,), max_iter=32, n_iter_no_change=32,
              random_state=1, verbose=0, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7fe73ec5c400>,
           'cost': 0.3537735849056

## Get the Score of the final ensemble



In [9]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.6875
