In [47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot

In [6]:
from sklearn.datasets import load_iris

In [7]:
iris = load_iris()

## Dataset Anaylsis

In [8]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [9]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [10]:
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [11]:
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [12]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [13]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [14]:
iris_target = pd.DataFrame(iris.target, columns=['target'])

In [15]:
iris_target.head()

Unnamed: 0,target
0,0
1,0
2,0
3,0
4,0


## Splitting dataset

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
X_train, X_test, y_train, y_test = train_test_split(iris_df, iris_target, test_size=0.33, random_state=42)

## Linear Regression

In [18]:
from sklearn.linear_model import LinearRegression

In [19]:
lr = LinearRegression()

In [20]:
lr.fit(X_train, y_train)

LinearRegression()

In [21]:
lr_pred = lr.predict(X_test)

In [46]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [45]:
print('Mean Absolute Error:', mean_absolute_error(y_test, lr_pred))
print('Mean Squared Error:', mean_squared_error(y_test, lr_pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, lr_pred)))

Mean Absolute Error: 0.1598427861123997
Mean Squared Error: 0.04260034113761791
Mean Root Squared Error: 0.20639850081242817


## K-Means

In [26]:
from sklearn.cluster import KMeans

In [27]:
kmeans = KMeans(n_clusters=3)

In [28]:
kmeans.fit(X_train)
kmeans_pred = kmeans.predict(X_test)

In [29]:
centers = kmeans.cluster_centers_
print(centers)

[[5.77142857 2.69285714 4.34047619 1.42619048]
 [4.96451613 3.37741935 1.46451613 0.2483871 ]
 [6.85925926 3.08518519 5.7        2.01111111]]


In [30]:
print('Mean Absolute Error:', mean_absolute_error(y_test, kmeans_pred))
print('Mean Squared Error:', mean_squared_error(y_test, kmeans_pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, kmeans_pred)))

Mean Absolute Error: 0.84
Mean Squared Error: 1.0
Mean Root Squared Error: 1.0


## KNN

In [31]:
from sklearn.neighbors import KNeighborsClassifier

In [32]:
knn = KNeighborsClassifier(n_neighbors=3)

In [33]:
knn.fit(X_train, y_train)

  knn.fit(X_train, y_train)


KNeighborsClassifier(n_neighbors=3)

In [34]:
knn_pred = knn.predict(X_test)

In [35]:
print('Mean Absolute Error:', mean_absolute_error(y_test, knn_pred))
print('Mean Squared Error:', mean_squared_error(y_test, knn_pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, knn_pred)))

Mean Absolute Error: 0.02
Mean Squared Error: 0.02
Mean Root Squared Error: 0.1414213562373095


## ID3

In [36]:
from sklearn.tree import DecisionTreeClassifier

In [37]:
dtc = DecisionTreeClassifier()

In [38]:
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [39]:
dtc_pred = dtc.predict(X_test)

In [40]:
print('Mean Absolute Error:', mean_absolute_error(y_test, dtc_pred))
print('Mean Squared Error:', mean_squared_error(y_test, dtc_pred))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(y_test, dtc_pred)))

Mean Absolute Error: 0.02
Mean Squared Error: 0.02
Mean Root Squared Error: 0.1414213562373095
