# **Implement a KNN model to classify the animals in to categorie**


In [None]:
# KNN Classification
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings
warnings.filterwarnings('ignore')

# **Importing dataset**

In [None]:
from google.colab import files
uploaded=files.upload()

In [None]:
zoo = pd.read_csv('Zoo.csv')

In [None]:
zoo

#**Data Pre-processing**

In [None]:
zoo.info()

In [None]:
zoo.describe()

In [None]:
#check if there are duplicates in animal_name
duplicates = zoo['animal name'].value_counts()
duplicates[duplicates > 1]

In [None]:
frog = zoo[zoo['animal name'] == 'frog']
frog

In [None]:
# observation: find that one frog is venomous and another one is not 
# change the venomous one into frog2 to seperate 2 kinds of frog 
zoo['animal name'][(zoo['venomous'] == 1 )& (zoo['animal name'] == 'frog')] = "frog2"

In [None]:
zoo.head(30)

In [None]:
# Lets plot how many animals are domestic or not
plt.figure(figsize=(10,8));
zoo['domestic'].value_counts().plot(kind="bar");
plt.xlabel('Is Domestic');
plt.ylabel("Count");
plt.plot()

In [None]:
# So we can see mostly animals are not domestic.

pd.crosstab(zoo['type'], zoo['domestic'])

In [None]:
# Lets see species wise domestic and non-domestic animals
pd.crosstab(zoo['type'], zoo['domestic']).plot(kind="bar", figsize=(10, 8), title="Class wise Domestic & Non-Domestic Count");
plt.plot();

In [None]:
# Lets see how many animals provides us milk
zoo['milk'].value_counts()

In [None]:
zoo

In [None]:
# split train test data into 70/30.
from sklearn.model_selection import train_test_split
X = zoo.iloc[:,1:16]
Y = zoo.iloc[:,16]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1, stratify=Y)


In [None]:
X_train

In [None]:
X_test

In [None]:
Y_train

In [None]:
Y_test

In [None]:
num_folds = 10
kfold = KFold(n_splits=10)

In [None]:
model = KNeighborsClassifier(n_neighbors=10)
model.fit(X_train,Y_train)

In [None]:
#Predicting on test data
preds = model.predict(X_test) # predicting on test data set 
pd.Series(preds).value_counts() # getting the count of each category

In [None]:
# Accuracy 
np.mean(preds==Y_test)

In [None]:
model.score(X_train,Y_train)

In [None]:
print("Accuracy", accuracy_score(Y_test,preds)*100)

In [None]:
#use cross validation score since this is a small size dataset 
#Get cross validation score of K-Nearest Neighbors


# **Grid Search for Algorithm Tuning**

In [None]:
n_neighbors = np.array(range(1,40))
param_grid = dict(n_neighbors=n_neighbors)

In [None]:
model = KNeighborsClassifier()
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(X, Y)

In [None]:
print(grid.best_score_)
print(grid.best_params_)

In [None]:
k_values = np.arange(1,25)
train_accuracy = []
test_accuracy = []

for i, k in enumerate(k_values):
    # k from 1 to 25(exclude)
    knn = KNeighborsClassifier(n_neighbors=k)
    # Fit with knn
    knn.fit(X_train,Y_train)
    #train accuracy
    train_accuracy.append(knn.score(X_train,Y_train))
    # test accuracy
    test_accuracy.append(knn.score(X_test,Y_test))
# Plot
plt.figure(figsize=[13,8])
plt.plot(k_values, test_accuracy, label = 'Testing Accuracy')
plt.plot(k_values, train_accuracy, label = 'Training Accuracy')
plt.legend()
plt.title('-value VS Accuracy')
plt.xlabel('Number of Neighbors')
plt.ylabel('Accuracy')
plt.xticks(k_values)
plt.savefig('graph.png')
plt.show()
print("Best accuracy is {} with K = {}".format(np.max(test_accuracy),1+test_accuracy.index(np.max(test_accuracy))))
