# Working on iris data

#### Loading Data Set

In [33]:
from sklearn.datasets import load_iris
iris = load_iris()

In [7]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [8]:
# Feature variable
print(iris["feature_names"])

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [9]:
# Target variable name
print(iris["target_names"])

['setosa' 'versicolor' 'virginica']


In [10]:
# Shape of data
iris["data"].shape

(150, 4)

In [11]:
# Head of data of feature variables
iris["data"][:10]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1]])

In [12]:
# Head of data of target variable
iris["target"][:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### Train and Test Split

In [34]:
# importing Library
from sklearn.model_selection import train_test_split

#Splitting Data
X_train, X_test, y_train, y_test = train_test_split(iris["data"], iris["target"], 
                                                    test_size=0.25, random_state=804)

In [35]:
# Shape of train and test data
print("Shape of train x:",X_train.shape,"\nShape of train y:",y_train.shape)
print("Shape of test x:",X_test.shape,"\nShape of test y:",y_test.shape)

Shape of train x: (112, 4) 
Shape of train y: (112,)
Shape of test x: (38, 4) 
Shape of test y: (38,)


## 1 Nearest Neighbour method

In [1]:
# Building a function of ! Nearest Neighbour
import numpy as np
import math
def NN(X_train,y_train, X_test):
    dist = []
    y_predicted = []
    for i in range(len(X_test)):
        dis = 0
        minimum = math.inf
        y_pred = 0
        for j in range(len(X_train)):
            dis = np.sqrt(sum((X_train[j]-X_test[i])**2)) 
            if minimum > dis: # Finding minimum dist
                minimum = dis
                y_pred = y_train[j]
        dist.append(dis)
        y_predicted.append(y_pred)
    return y_predicted

#### Applying Nearest Neighbour function on riris data

In [36]:
# Applying a Nearest Neighbour function on data
y_predicted_iris = NN(X_train,y_train, X_test)

#### NN Model Evaluation on iris data

In [46]:
# Accuracy and Error Calculation
error_iris = np.mean((y_test != y_predicted_iris))
accuracy_iris = np.mean((y_test == y_predicted_iris))
print("Error of NN Model where k = 3:",round(error_iris,2))
print("Accuracy of NN Model where k = 3:", accuracy_iris,)

Error of NN Model where k = 3: 0.03
Accuracy of NN Model where k = 3: 0.9736842105263158


## 3 Nearest Neighbour method

In [2]:
# Building a function of ! Nearest Neighbour
import numpy as np
import math
def KNN(X_train,y_train, X_test, n):
    y_predicted = []
    for i in range(len(X_test)):
        dis = []
        mini = math.inf
        y_pred = 0 
        dis_k = []
        neighbour = []
        nearest_neighbour = []
        for j in range(len(X_train)):
            dis.append([np.sqrt(sum((X_train[j]-X_test[i])**2)),y_train[j]])
            dis_sorted = sort(dis)
            neighbour = dis_sorted[:n]
            test = sum(neighbour,[])[1::2]
            nearest_neighbour = max(test,key=test.count)
        y_predicted.append(nearest_neighbour)
    return y_predicted

In [3]:
# Making own sorting function
def sort(dis):
    for i in range(len(dis)):
        for j in range(i + 1, len(dis)):
            if dis[i] > dis[j]:
                dis[i],dis[j] = dis[j],dis[i]
    return dis

#### Applying k Nearest Neighbour function on riris data with k = 3

In [40]:
# Applying KNN Function with k = 3
y_predicted_iris_knn = KNN(X_train,y_train, X_test, 5)

#### KNN Model Evaluation on iris data

In [41]:
# Accuracy and Error Calculation
error_iris_knn = np.mean((y_test != y_predicted_iris_knn))
accuracy_iris_knn = np.mean((y_test == y_predicted_iris_knn))
print("Error of KNN Model where k = 3:",round(error_iris_knn,2))
print("Accuracy of KNN Model where k = 3:", round(accuracy_iris_knn,2))

Error of KNN Model where k = 3: 0.03
Accuracy of KNN Model where k = 3: 0.97
