# ***Installation***

In [1]:
!pip install scipy==1.7.0



In [2]:
!apt-get install swig -y
!pip install Cython numpy

Reading package lists... Done
Building dependency tree       
Reading state information... Done
swig is already the newest version (3.0.12-1).
0 upgraded, 0 newly installed, 0 to remove and 37 not upgraded.


In [3]:
!pip install auto-sklearn

Collecting auto-sklearn
  Downloading auto-sklearn-0.14.0.tar.gz (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 4.7 MB/s 
Collecting scikit-learn<0.25.0,>=0.24.0
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 30 kB/s 
Collecting distributed<2021.07,>=2.2.0
  Downloading distributed-2021.6.2-py3-none-any.whl (722 kB)
[K     |████████████████████████████████| 722 kB 56.7 MB/s 
Collecting liac-arff
  Downloading liac-arff-2.5.0.tar.gz (13 kB)
Collecting threadpoolctl
  Downloading threadpoolctl-3.0.0-py3-none-any.whl (14 kB)
Collecting ConfigSpace<0.5,>=0.4.14
  Downloading ConfigSpace-0.4.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 40.3 MB/s 
[?25hCollecting pynisher>=0.6.3
  Downloading pynisher-0.6.4.tar.gz (11 kB)
Collecting pyrfr<0.9,>=0.8.1
  Downloading pyrfr-0.8.2-cp37-cp37m-manylinux2014_x86_64.whl (4.0 MB)


In [5]:
!pip install scikit-learn==0.24.0

Collecting scikit-learn==0.24.0
  Downloading scikit_learn-0.24.0-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 1.6 MB/s 
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.24.2
    Uninstalling scikit-learn-0.24.2:
      Successfully uninstalled scikit-learn-0.24.2
Successfully installed scikit-learn-0.24.0


# ***Importing Libraries and Dependencies***

In [1]:
import sklearn.datasets
import sklearn.metrics
import pandas as pd
import autosklearn.classification

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# ***Data Loading***

In [2]:
X , y = sklearn.datasets.load_breast_cancer(return_X_y=True)

# ***Train Test Split***

In [3]:
X_train, X_test, y_train, y_test = \
    sklearn.model_selection.train_test_split(X, y, random_state=1)

# ***Building a Classifier***

In [4]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    
)

# ***Fitting a Classifier***

In [5]:
automl.fit(X_train, y_train)

AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120)

# ***Leaderboard***

In [6]:
print(automl.leaderboard())

          rank  ensemble_weight                type      cost  duration
model_id                                                               
34           1             0.16         extra_trees  0.014184  2.236281
7            2             0.10         extra_trees  0.014184  1.994756
29           3             0.06         extra_trees  0.021277  2.112083
16           4             0.04   gradient_boosting  0.021277  1.356203
26           5             0.02         extra_trees  0.028369  2.848951
22           6             0.04   gradient_boosting  0.028369  1.466864
2            7             0.04       random_forest  0.028369  2.211880
3            8             0.10                 mlp  0.028369  1.380865
14           9             0.02                 mlp  0.028369  2.542044
19          10             0.02         extra_trees  0.028369  3.585028
17          11             0.02   gradient_boosting  0.035461  2.105491
8           12             0.02       random_forest  0.035461  2

# ***Viewing Models***

In [7]:
print(automl.show_models())

[(0.160000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'extra_trees', 'data_preprocessor:__choice__': 'feature_type', 'feature_preprocessor:__choice__': 'polynomial', 'classifier:extra_trees:bootstrap': 'False', 'classifier:extra_trees:criterion': 'gini', 'classifier:extra_trees:max_depth': 'None', 'classifier:extra_trees:max_features': 0.4791448484072812, 'classifier:extra_trees:max_leaf_nodes': 'None', 'classifier:extra_trees:min_impurity_decrease': 0.0, 'classifier:extra_trees:min_samples_leaf': 1, 'classifier:extra_trees:min_samples_split': 6, 'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 'data_preprocessor:feature_type:categorical_transformer:categorical_encoding:__choice__': 'one_hot_encoding', 'data_preprocessor:feature_type:categorical_transformer:category_coalescence:__choice__': 'minority_coalescer', 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean', 'data_preprocessor:feature_type:numerical_

In [8]:
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())

auto-sklearn results:
  Dataset name: 46571662-25be-11ec-81ca-0242ac1c0002
  Metric: accuracy
  Best validation score: 0.985816
  Number of target algorithm runs: 33
  Number of successful target algorithm runs: 33
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0



# ***Score of Final Ensemble***

In [9]:
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.951048951048951
