In [41]:
import sklearn
import numpy as np
from sklearn import datasets


In [42]:
np.random.seed(10)
raw_data = datasets.load_iris()
raw_data

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [43]:
raw_data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [44]:
print(raw_data.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988

The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fis

In [45]:
data = raw_data.data
target = raw_data.target

In [46]:
print(data.shape)
print(target.shape)

(150, 4)
(150,)


In [47]:
from sklearn.model_selection import train_test_split

data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.3)
print(data_train.shape)
print(target_train.shape)

(105, 4)
(105,)


In [48]:
print(data_test.shape)
print(target_test.shape)

(45, 4)
(45,)


In [49]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(data_train, target_train)

In [50]:
target_pred_raw = model.predict(data_test)
target_pred = np.round(target_pred_raw).astype(int)
print(target_pred_raw)
print(target_pred)

[ 1.18223488  1.74047551 -0.09337692  1.33314444 -0.05274766  0.99131049
  1.43739146  1.03507686 -0.03939211  1.32570288  1.44247903  1.78800664
  1.20380905 -0.03550688  0.0109412   1.82846963  1.41932426  0.02500272
  0.05938911 -0.11582572  1.89875554  1.98137902  1.57201919  0.14112592
  1.2933115  -0.02935918  1.18358058  1.28151728  1.26648714  2.091169
  1.19208229  1.17151889  1.60490811  2.03250864  1.66404189  0.02036333
  1.91067387  1.97120273  2.19817     1.7827227   0.10798037  0.20600742
  1.27790792  0.05281612  1.42653211]
[1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2 1 1 2 2 2 0 2
 2 2 2 0 0 1 0 1]


In [51]:
from sklearn.metrics import confusion_matrix


conf_matrix = confusion_matrix(target_test, target_pred)
print(conf_matrix)


[[14  0  0]
 [ 0 17  0]
 [ 0  0 14]]


In [52]:
from sklearn.metrics import classification_report

In [53]:
print(classification_report(target_test, target_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        17
           2       1.00      1.00      1.00        14

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

