In [9]:
%matplotlib inline


# Classification

The following example shows how to fit a simple classification model with
*auto-sklearn*.


In [10]:
from pprint import pprint

import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

## Data Loading



In [12]:

import pandas as pd
from sklearn.model_selection import train_test_split

file_path = '../data/airbnb.csv'

df = pd.read_csv(file_path)

y = df['Rating']

X = df.drop('Rating', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Build and fit a classifier



In [13]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder="/tmp/autosklearn_classification_example_tmp",
)
automl.fit(X_train, y_train, dataset_name="airbnb")

Fitting to the training data: 100%|[32m██████████[0m| 120/120 [01:51<00:00,  1.08it/s, The total time budget for this task is 0:02:00]


AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      per_run_time_limit=30, time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_classification_example_tmp')

## View the models found by auto-sklearn



In [14]:
print(automl.leaderboard())

          rank  ensemble_weight               type      cost   duration
model_id                                                               
2            1             0.52      random_forest  0.276363  16.915586
9            2             0.14  gradient_boosting  0.298727  12.411541
3            3             0.10                mlp  0.305093  29.759696
8            4             0.24      liblinear_svc  0.312602  11.302748


## Print the final ensemble constructed by auto-sklearn



In [15]:
pprint(automl.show_models(), indent=4)

{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f4353cf1c70>,
           'cost': 0.27636304276852763,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f4353de26a0>,
           'ensemble_weight': 0.52,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f4353cf6880>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=11, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f4353cdd6a0>,
           'cost': 0.3050930460333007,
           'data_preprocessor': <autosklear

## Get the Score of the final ensemble



In [16]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.7185951303598362
