In [26]:
from sklearn.datasets import load_iris, load_digits
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import warnings
from sklearn.neural_network import MLPClassifier
import pandas as pd

In [27]:
# load datasets
iris = load_iris()
digits = load_digits()
boston = pd.read_csv('boston.csv');

In [28]:
print(boston)
print(iris)
print(digits)

     Unnamed: 0     crim    zn  indus  chas    nox     rm   age     dis  rad  \
0             1  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1   
1             2  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2   
2             3  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2   
3             4  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3   
4             5  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3   
..          ...      ...   ...    ...   ...    ...    ...   ...     ...  ...   
501         502  0.06263   0.0  11.93     0  0.573  6.593  69.1  2.4786    1   
502         503  0.04527   0.0  11.93     0  0.573  6.120  76.7  2.2875    1   
503         504  0.06076   0.0  11.93     0  0.573  6.976  91.0  2.1675    1   
504         505  0.10959   0.0  11.93     0  0.573  6.794  89.3  2.3889    1   
505         506  0.04741   0.0  11.93     0  0.573  6.030  80.8  2.5050    1   

     tax  ptratio   black  lstat  medv 

In [29]:
# data description
print(iris.DESCR)
print(digits.DESCR)
# print(boston.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [30]:
# create a list of classifiers
classifiers1 = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()
]


In [32]:
# iterate over datasets
for name, dataset in [("iris", iris), ("digits", digits)]:
    X, y = dataset.data, dataset.target
    print(f"Accuracy scores for {name} dataset :")
    # iterate over classifiers
    for clf in classifiers1:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            # calculate cross-validation scores
            score = cross_val_score(clf, X, y, cv=5, n_jobs=-1).mean()
            print(f"{clf}: ",score)
print("\n\n")

Accuracy scores for iris dataset :
KNeighborsClassifier(n_neighbors=3):  0.9666666666666668
SVC(C=0.025, kernel='linear'):  0.9466666666666667
SVC(C=1, gamma=2):  0.9666666666666668
GaussianProcessClassifier(kernel=1**2 * RBF(length_scale=1)):  0.9666666666666668
DecisionTreeClassifier(max_depth=5):  0.9533333333333334
RandomForestClassifier(max_depth=5, max_features=1, n_estimators=10):  0.9533333333333334
MLPClassifier(alpha=1):  0.9733333333333334
AdaBoostClassifier():  0.9466666666666667
GaussianNB():  0.9533333333333334
QuadraticDiscriminantAnalysis():  0.9800000000000001
Accuracy scores for digits dataset :
KNeighborsClassifier(n_neighbors=3):  0.966621788919839
SVC(C=0.025, kernel='linear'):  0.9476973073351903
SVC(C=1, gamma=2):  0.12245125348189416
GaussianProcessClassifier(kernel=1**2 * RBF(length_scale=1)):  0.10016713091922005
DecisionTreeClassifier(max_depth=5):  0.6305694831321571
RandomForestClassifier(max_depth=5, max_features=1, n_estimators=10):  0.7857551841535129
ML