## sklearn.datasets.load_iris
* sklearn.datasets.load_iris(*, return_X_y=False, as_frame=False)
* Returns:
    dataBunch
    Dictionary-like object, with the following attributes.

    data{ndarray, dataframe} of shape (150, 4)
    The data matrix. If as_frame=True, data will be a pandas DataFrame.

    target: {ndarray, Series} of shape (150,)
    The classification target. If as_frame=True, target will be a pandas Series.

    feature_names: list
    The names of the dataset columns.

    target_names: list
    The names of target classes.

    frame: DataFrame of shape (150, 5)
    Only present when as_frame=True. DataFrame with data and target.

    New in version 0.23.

    DESCR: str
    The full description of the dataset.

    filename: str
    The path to the location of the data.

    New in version 0.20.

    (data, target)tuple if return_X_y is True
    A tuple of two ndarray. The first containing a 2D array of shape (n_samples, n_features) with each row representing one sample and each column representing the features. The second ndarray of shape (n_samples,) containing the target samples.

    New in version 0.18.

In [18]:
from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [10]:
iris_df = pd.DataFrame(iris.data, columns = iris.feature_names)

In [12]:
iris_df.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2


In [16]:
iris_df["label"] = iris.target
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## sklearn.model_selection.train_test_split
* sklearn.model_selection.train_test_split(*arrays, test_size=None, train_size=None, random_state=None, shuffle=True, stratify=None

In [20]:
from sklearn.model_selection import train_test_split
data = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
data

[array([[5.7, 2.9, 4.2, 1.3],
        [7.6, 3. , 6.6, 2.1],
        [5.6, 3. , 4.5, 1.5],
        [5.1, 3.5, 1.4, 0.2],
        [7.7, 2.8, 6.7, 2. ],
        [5.8, 2.7, 4.1, 1. ],
        [5.2, 3.4, 1.4, 0.2],
        [5. , 3.5, 1.3, 0.3],
        [5.1, 3.8, 1.9, 0.4],
        [5. , 2. , 3.5, 1. ],
        [6.3, 2.7, 4.9, 1.8],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [5.6, 2.7, 4.2, 1.3],
        [5.1, 3.4, 1.5, 0.2],
        [5.7, 3. , 4.2, 1.2],
        [7.7, 3.8, 6.7, 2.2],
        [4.6, 3.2, 1.4, 0.2],
        [6.2, 2.9, 4.3, 1.3],
        [5.7, 2.5, 5. , 2. ],
        [5.5, 4.2, 1.4, 0.2],
        [6. , 3. , 4.8, 1.8],
        [5.8, 2.7, 5.1, 1.9],
        [6. , 2.2, 4. , 1. ],
        [5.4, 3. , 4.5, 1.5],
        [6.2, 3.4, 5.4, 2.3],
        [5.5, 2.3, 4. , 1.3],
        [5.4, 3.9, 1.7, 0.4],
        [5. , 2.3, 3.3, 1. ],
        [6.4, 2.7, 5.3, 1.9],
        [5. , 3.3, 1.4, 0.2],
        [5. , 3.2, 1.2, 0.2],
        [5

In [21]:
len(data)

4

In [22]:
len(data[0])

100

## sklearn.tree.DecisionTreeClassifier
* class sklearn.tree.DecisionTreeClassifier(*, criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, class_weight=None, ccp_alpha=0.0)

In [23]:
from sklearn.tree import DecisionTreeClassifier
dt_clf = DecisionTreeClassifier(random_state=0)

dt_clf.fit(X_train, y_train)

In [24]:
dt_clf.classes_

array([0, 1, 2])

In [25]:
dt_clf.feature_importances_

array([0.01256535, 0.02915555, 0.05981177, 0.89846733])

In [26]:
pred = dt_clf.predict(X_test)

In [27]:
pred

array([1, 0, 2, 1, 1, 0, 1, 2, 2, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 1, 2, 1, 2])

In [28]:
y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 2, 2, 1, 2])

In [29]:
from sklearn.metrics import accuracy_score


accuracy_score(pred, y_test)

0.96