In [10]:
from sklearn.datasets import load_digits
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score  

In [11]:
data = load_digits ()

x = data.data 
y = data.target 

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [12]:
# set the parameters by cross validation 
tuned_parameters = [{'kernel': ['rbf','linear'],'gamma': [1e-3, 1e-4],'C': [1,10,100,1000]}]

In [13]:
def grid_search (x_train,y_train,x_test,y_test,tunes_params,metric="precision"):
    print("# Tuning hyper-parameters for %s" % metric)
    clf = GridSearchCV(SVC(), tuned_parameters, scoring='%s_macro' % metric)
    clf.fit(x_train,y_train)
    print("Best params found")
    print(clf.best_params_)
    
    print("classification report")
    y_true,y_pred = y_test , clf.predict(x_test)
    print(classification_report(y_true,y_pred))

In [14]:
import warnings 
warnings.filterwarnings("ignore")

# using precision 
grid_search(x_train,y_train,x_test,y_test,tuned_parameters,'precision')

# Tuning hyper-parameters for precision
Best params found
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
classification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      0.97      0.99        34
           4       1.00      1.00      1.00        46
           5       0.98      0.98      0.98        47
           6       0.97      1.00      0.99        35
           7       0.97      0.97      0.97        34
           8       1.00      1.00      1.00        30
           9       0.97      0.97      0.97        40

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [11]:
# the best SVC classifier possible for this task : {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}

In [1]:
!pip install auto-sklearn



In [2]:
import autosklearn.classification as automl

cls = automl.AutoSklearnClassifier(time_left_for_this_task=1*60, per_run_time_limit=30,n_jobs = 1 )

In [9]:
cls.fit(x_train,y_train)



AutoSklearnClassifier(n_jobs=1, per_run_time_limit=30,
                      time_left_for_this_task=60)

In [15]:
print(cls.sprint_statistics()) 

auto-sklearn results:
  Dataset name: e74c92e8-9e5d-11ec-8104-0242ac1c0002
  Metric: accuracy
  Best validation score: 0.985263
  Number of target algorithm runs: 9
  Number of successful target algorithm runs: 7
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 1



In [16]:
predictions = cls.predict(x_test)
accuracy_score(y_test,predictions)

0.9777777777777777

In [17]:
# Compare the results 
# execution time : the automl takes more time 
# model complexity : 
# model performance : there is a little difference between the two , with automl the accurancy is better 