# Iris Predict

In [1]:
# Load libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load dataset
iris = pd.read_csv("Iris.csv")

In [3]:
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
iris = iris.drop('Id', axis=1)

In [5]:
iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
array = iris.values
X = array[:,0:4]
Y = array[:,4]
validation_size = 0.20
seed = 7
scoring = 'accuracy'
X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [7]:
shuffle=False
models = []

models.append(('LOR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

results = []
names = []
for name, model in models:
	kfold = KFold(n_splits=2, random_state=7, shuffle=False)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)

LOR: 0.933333 (0.050000)
LDA: 0.975000 (0.008333)
KNN: 0.975000 (0.008333)
CART: 0.983333 (0.000000)
NB: 0.966667 (0.000000)
SVM: 0.991667 (0.008333)


SVM (Support Vector Machine) have the highiest accuracy score; on the other hand, CART (Decision Tree Classifier) has next highest accuracy score and LOR (Logistic Regression) have the lowest accuracy score.

In [8]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(iris['Species'])

LabelEncoder()

In [9]:
le.classes_

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [10]:
iris['Species'] = le.transform(iris['Species'])

Tranform the label 'Species' to numbers because it is category.

In [11]:
X = iris.iloc[ : , 0:4].values
Y = iris.iloc[ : ,  4].values

In [12]:
print(X.shape)
print(Y.shape)

(150, 4)
(150,)


In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

In [14]:
lr = LinearRegression()
model = lr.fit(X_train, Y_train)

In [15]:
y_pred = model.predict(X_test)
y_pred

array([ 1.2305166 , -0.04088817,  2.22162345,  1.34972015,  1.28607917,
        0.02346565,  1.05796175,  1.82557039,  1.37219259,  1.06898774,
        1.6997677 , -0.0725704 , -0.15476151, -0.06513544, -0.02320247,
        1.39524413,  1.99939235,  1.04970178,  1.28040501,  1.97315432,
        0.03134273,  1.59775615,  0.09419949,  1.91821542,  1.83026281,
        1.88002641,  1.78919243,  2.03257165,  0.0373074 ,  0.02643301])

In [16]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
print(y_pred.shape)

(120, 4)
(120,)
(30, 4)
(30,)
(30,)


In [17]:
model.score(X_test, Y_test)

0.94672451493517107

In [18]:
Y_test

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0,
       2, 2, 2, 2, 2, 0, 0])

In [19]:
y_pred = y_pred.reshape(30, -1)

In [20]:
from sklearn import metrics

metrics.explained_variance_score(Y_test, y_pred)

0.94753621031943813