In [1]:
# %pip install scikit-learn==1.6.0

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import classification_report, accuracy_score

In [3]:
iris_data = datasets.load_iris()
iris_data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [4]:
print(iris_data['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988

The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fis

In [5]:
iris_data['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [6]:
iris_data['target'], iris_data['target'].shape

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
 (150,))

In [7]:
iris_data['data'], iris_data['data'].shape

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
        [5

In [8]:
X, y = iris_data['data'], iris_data['target']
X.shape, y.shape

((150, 4), (150,))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [10]:
knn_model_5 = KNeighborsClassifier(n_neighbors=5)
knn_model_5

In [11]:
knn_model_10 = KNeighborsClassifier(n_neighbors=10)
knn_model_10

In [12]:
knn_model_5.fit(X_train, y_train)

In [13]:
knn_model_10.fit(X_train, y_train)

In [14]:
y_pred = knn_model_5.predict(X_test)
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [15]:
y_pred = knn_model_10.predict(X_test)
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [16]:
num_neighbors_list = list(range(2, 21))
for num_neighbors in num_neighbors_list:
    knn_model = KNeighborsClassifier(n_neighbors=num_neighbors)
    knn_model.fit(X_train, y_train)
    y_pred = knn_model.predict(X_test)
    print('num_neighbors =', num_neighbors, ' -  accuracy_score =', accuracy_score(y_pred, y_test))

num_neighbors = 2  -  accuracy_score = 1.0
num_neighbors = 3  -  accuracy_score = 1.0
num_neighbors = 4  -  accuracy_score = 1.0
num_neighbors = 5  -  accuracy_score = 1.0
num_neighbors = 6  -  accuracy_score = 1.0
num_neighbors = 7  -  accuracy_score = 0.9666666666666667
num_neighbors = 8  -  accuracy_score = 1.0
num_neighbors = 9  -  accuracy_score = 1.0
num_neighbors = 10  -  accuracy_score = 1.0
num_neighbors = 11  -  accuracy_score = 1.0
num_neighbors = 12  -  accuracy_score = 1.0
num_neighbors = 13  -  accuracy_score = 1.0
num_neighbors = 14  -  accuracy_score = 1.0
num_neighbors = 15  -  accuracy_score = 1.0
num_neighbors = 16  -  accuracy_score = 1.0
num_neighbors = 17  -  accuracy_score = 1.0
num_neighbors = 18  -  accuracy_score = 1.0
num_neighbors = 19  -  accuracy_score = 1.0
num_neighbors = 20  -  accuracy_score = 1.0


In [17]:
svm_model = SVC(kernel='linear', C=1)
svm_model

In [18]:
c_list = [c / 10 for c in list(range(1, 11))]
print(c_list)
for c in c_list:
    svm_model = SVC(kernel='linear', C=c)
    svm_model.fit(X_train, y_train)
    y_pred = svm_model.predict(X_test)
    print('c =', c, ' -  accuracy_score =', accuracy_score(y_pred, y_test))

[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
c = 0.1  -  accuracy_score = 1.0
c = 0.2  -  accuracy_score = 1.0
c = 0.3  -  accuracy_score = 1.0
c = 0.4  -  accuracy_score = 1.0
c = 0.5  -  accuracy_score = 1.0
c = 0.6  -  accuracy_score = 1.0
c = 0.7  -  accuracy_score = 1.0
c = 0.8  -  accuracy_score = 1.0
c = 0.9  -  accuracy_score = 1.0
c = 1.0  -  accuracy_score = 1.0


In [19]:
tree_model = DecisionTreeClassifier(max_depth=3)
tree_model

In [20]:
max_depth_list = list(range(1, 5))
print(max_depth_list)
for max_depth in max_depth_list:
    tree_model = DecisionTreeClassifier(max_depth=max_depth)
    tree_model.fit(X_train, y_train)
    y_pred = tree_model.predict(X_test)
    print('max_depth =', max_depth, ' -  accuracy_score =', accuracy_score(y_pred, y_test))

[1, 2, 3, 4]
max_depth = 1  -  accuracy_score = 0.6333333333333333
max_depth = 2  -  accuracy_score = 0.9666666666666667
max_depth = 3  -  accuracy_score = 1.0
max_depth = 4  -  accuracy_score = 1.0


In [21]:
naive_bayes_model = GaussianNB()
naive_bayes_model

In [22]:
naive_bayes_model.fit(X_train, y_train)

In [23]:
y_pred = naive_bayes_model.predict(X_test)

In [24]:
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

