# Iris dataset

## Importing the libraries

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Importing the dataset

In [5]:
from sklearn.datasets import load_iris
iris = load_iris()
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [6]:
dataset = pd.DataFrame(iris['data'], columns=iris['feature_names'])
dataset['target'] = iris['target']
dataset

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [8]:
X = dataset.iloc[:, :-1].values
y = dataset['target'].values

## Splitting the dataset into train and test set

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [36]:
X_test

array([[5.8, 2.8, 5.1, 2.4],
       [6. , 2.2, 4. , 1. ],
       [5.5, 4.2, 1.4, 0.2],
       [7.3, 2.9, 6.3, 1.8],
       [5. , 3.4, 1.5, 0.2],
       [6.3, 3.3, 6. , 2.5],
       [5. , 3.5, 1.3, 0.3],
       [6.7, 3.1, 4.7, 1.5],
       [6.8, 2.8, 4.8, 1.4],
       [6.1, 2.8, 4. , 1.3],
       [6.1, 2.6, 5.6, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.5, 2.8, 4.6, 1.5],
       [6.1, 2.9, 4.7, 1.4],
       [4.9, 3.6, 1.4, 0.1],
       [6. , 2.9, 4.5, 1.5],
       [5.5, 2.6, 4.4, 1.2],
       [4.8, 3. , 1.4, 0.3],
       [5.4, 3.9, 1.3, 0.4],
       [5.6, 2.8, 4.9, 2. ],
       [5.6, 3. , 4.5, 1.5],
       [4.8, 3.4, 1.9, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [6.2, 2.8, 4.8, 1.8],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.8, 1.9, 0.4],
       [6.2, 2.9, 4.3, 1.3],
       [5. , 2.3, 3.3, 1. ],
       [5. , 3.4, 1.6, 0.4],
       [6.4, 3.1, 5.5, 1.8],
       [5.4, 3. , 4.5, 1.5],
       [5.2, 3.5, 1.5, 0.2],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2

## Fitting the LogisticRegression Model

In [16]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [17]:
y_pred = model.predict(X_test)

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score
confusion_matrix(y_test, y_pred)

array([[16,  0,  0],
       [ 0, 13,  5],
       [ 0,  0, 11]])

In [19]:
accuracy_score(y_test, y_pred)

0.8888888888888888

In [20]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=model, X=X_train, y=y_train, cv=5)
print(accuracies.mean())
print(accuracies.std())

0.9427204374572795
0.034547570009217564


## Fitting the NaiveBayes model 

In [21]:
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [22]:
y_pred = nb_model.predict(X_test)

In [23]:
confusion_matrix(y_test, y_pred)

array([[16,  0,  0],
       [ 0, 18,  0],
       [ 0,  0, 11]])

In [24]:
accuracy_score(y_test, y_pred)

1.0

In [26]:
accuracies = cross_val_score(estimator=nb_model, X=X_train, y=y_train, cv=5)
print(accuracies.mean())
print(accuracies.std())

0.9435862383230805
0.04424082117395695


## Fitting the catboost Model

In [27]:
from catboost import CatBoostClassifier
cat_model = CatBoostClassifier()
cat_model.fit(X_train, y_train)

otal: 2.46s	remaining: 1.54s
615:	learn: 0.0082677	total: 2.46s	remaining: 1.54s
616:	learn: 0.0082503	total: 2.47s	remaining: 1.53s
617:	learn: 0.0082410	total: 2.47s	remaining: 1.53s
618:	learn: 0.0082258	total: 2.47s	remaining: 1.52s
619:	learn: 0.0082143	total: 2.47s	remaining: 1.52s
620:	learn: 0.0082000	total: 2.48s	remaining: 1.51s
621:	learn: 0.0081874	total: 2.48s	remaining: 1.51s
622:	learn: 0.0081660	total: 2.48s	remaining: 1.5s
623:	learn: 0.0081521	total: 2.49s	remaining: 1.5s
624:	learn: 0.0081369	total: 2.49s	remaining: 1.5s
625:	learn: 0.0081230	total: 2.5s	remaining: 1.49s
626:	learn: 0.0081085	total: 2.5s	remaining: 1.49s
627:	learn: 0.0080893	total: 2.5s	remaining: 1.48s
628:	learn: 0.0080765	total: 2.5s	remaining: 1.48s
629:	learn: 0.0080607	total: 2.5s	remaining: 1.47s
630:	learn: 0.0080472	total: 2.51s	remaining: 1.47s
631:	learn: 0.0080333	total: 2.52s	remaining: 1.47s
632:	learn: 0.0080140	total: 2.52s	remaining: 1.46s
633:	learn: 0.0080040	total: 2.52s	remainin

<catboost.core.CatBoostClassifier at 0x7f921c9d8cd0>

In [28]:
y_pred = cat_model.predict(X_test)

In [29]:
confusion_matrix(y_test, y_pred)

array([[16,  0,  0],
       [ 0, 17,  1],
       [ 0,  0, 11]])

In [30]:
accuracy_score(y_test, y_pred)

0.9777777777777777

In [34]:
accuracies = cross_val_score(estimator=cat_model, X=X_train, y=y_train, cv=5)
print(accuracies.mean())
print(accuracies.std())

learn: 0.0085127	total: 1.38s	remaining: 856ms
617:	learn: 0.0085016	total: 1.38s	remaining: 856ms
618:	learn: 0.0084889	total: 1.39s	remaining: 854ms
619:	learn: 0.0084725	total: 1.39s	remaining: 854ms
620:	learn: 0.0084617	total: 1.4s	remaining: 852ms
621:	learn: 0.0084454	total: 1.4s	remaining: 851ms
622:	learn: 0.0084251	total: 1.4s	remaining: 850ms
623:	learn: 0.0084046	total: 1.41s	remaining: 848ms
624:	learn: 0.0083855	total: 1.41s	remaining: 848ms
625:	learn: 0.0083620	total: 1.42s	remaining: 846ms
626:	learn: 0.0083495	total: 1.42s	remaining: 843ms
627:	learn: 0.0083365	total: 1.42s	remaining: 841ms
628:	learn: 0.0083192	total: 1.42s	remaining: 838ms
629:	learn: 0.0083061	total: 1.42s	remaining: 836ms
630:	learn: 0.0082885	total: 1.42s	remaining: 833ms
631:	learn: 0.0082749	total: 1.43s	remaining: 830ms
632:	learn: 0.0082636	total: 1.43s	remaining: 828ms
633:	learn: 0.0082510	total: 1.43s	remaining: 825ms
634:	learn: 0.0082367	total: 1.43s	remaining: 823ms
635:	learn: 0.008223