### Day Objective
- Classification Models
    - K Nearest Neighbours Classifier
- Evaluation Metrics

In [1]:
import numpy as np
import pandas as pd

### K-NN Classifier
- Easy to implement
- KNN model will assumes that similar things exist nearer to each other

#### How does KNN works
- Step1: Select K no.of neighbours. K = 3,4,5,...
- Step2: Calculate the distance between selected data point and Nearest neighbours
    - *Euclidean distance Formula: Sqrt( (x2 - x1) ** 2 + (y2 - y1) ** 2)*
- Step3: Assign new data point to the majority of nearest neighbour classifier.

#### Advantages and Disadvantage
- KNN is mostly useful for multinomial classification.
- If data set contains large amount of data, then the prediction stage will be slow.

<img src="sec.jpg">datasets</img>

<img src="third.jpg">data set</img>

<img src="first.jpg">dataset</img>

#### Calcuting shirt size of the person based upon his height and weight

In [1]:
# read the dataset
import pandas as pd

In [2]:
df = pd.read_csv("size.csv")
df # dataset # actual output # output

Unnamed: 0,Height,Weight,Shirt_size
0,158,58,m
1,158,59,m
2,158,63,m
3,160,59,m
4,160,60,m
5,163,60,m
6,163,61,m
7,160,64,l
8,163,64,l
9,165,61,l


In [3]:
df.shape # attributes

(18, 3)

In [4]:
df.sample() # generate 5 random samples

Unnamed: 0,Height,Weight,Shirt_size
8,163,64,l


In [5]:
X = df[['Height','Weight']] # i/p da,features of our ds
X

Unnamed: 0,Height,Weight
0,158,58
1,158,59
2,158,63
3,160,59
4,160,60
5,163,60
6,163,61
7,160,64
8,163,64
9,165,61


In [6]:
Y = df[['Shirt_size']] # predict
Y

Unnamed: 0,Shirt_size
0,m
1,m
2,m
3,m
4,m
5,m
6,m
7,l
8,l
9,l


In [8]:
### Apply KNN model
from sklearn.neighbors import KNeighborsClassifier 

In [9]:
model = KNeighborsClassifier() # model
model

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [10]:
help(model)

Help on KNeighborsClassifier in module sklearn.neighbors.classification object:

class KNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)
 |  KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None, **kwargs)
 |  
 |  Classifier implementing the k-nearest neighbors vote.
 |  
 |  Read more in the :ref:`User Guide <classification>`.
 |  
 |  Parameters
 |  ----------
 |  n_neighbors : int, optional (default = 5)
 |      Number of neighbors to use by default for :meth:`kneighbors` queries.
 |  
 |  weights : str or callable, optional (default = 'uniform')
 |      weight function used in prediction.  Possible values:
 |  
 |      - 'uniform' : uniform weights.  All points in each neighborhood
 |        are weighted equally.
 |      - 'distance' : weight points by the inverse of t

In [12]:
model = KNeighborsClassifier(n_neighbors = 4)
model # k=4

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=4, p=2,
                     weights='uniform')

In [13]:
model.fit(X,Y) # training the data 

  """Entry point for launching an IPython kernel.


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=4, p=2,
                     weights='uniform')

In [14]:
Y_predict = model.predict(X) # x =height,weight
Y_predict # size

array(['m', 'm', 'm', 'm', 'm', 'l', 'l', 'l', 'l', 'l', 'l', 'l', 'l',
       'l', 'l', 'l', 'l', 'l'], dtype=object)

In [15]:
model.predict([[162,63]]) # correct prediction

array(['l'], dtype=object)

In [18]:
df

Unnamed: 0,Height,Weight,Shirt_size
0,158,58,m
1,158,59,m
2,158,63,m
3,160,59,m
4,160,60,m
5,163,60,m
6,163,61,m
7,160,64,l
8,163,64,l
9,165,61,l


In [17]:
model.predict([[163,61]]) # wrong prediction

array(['l'], dtype=object)

### Evaluation metrics
- Confusion Matrix
- Accuracy Score

- **Confusion matrix**
![image.png](attachment:image.png)

In [15]:
## Confusion matrix
#---------------------

## Cats = 50, Birds = 50, Fish = 50 # birds and animals
#(animal)
###        Cats           Birds            Fish
### Cats     50            0                0
### Birds    0             48               2 # birds
### Fish     0              5               45



 
#                          TP + TN
## Accuracy Score = ---------------------------
#                       TP + TN + FP + FN

In [19]:
from sklearn.metrics import confusion_matrix # function
confusion_matrix(Y, Y_predict) # y means actual data

array([[11,  0],
       [ 2,  5]], dtype=int64)

In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(Y, Y_predict) # no.of correct pred/total pred

0.8888888888888888

In [None]:
 # working on dataset

In [21]:
df2 = pd.read_csv('https://raw.githubusercontent.com/nagamounika5/Datasets/master/IRIS.csv')

In [22]:
df2.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [23]:
df2.isnull().sum().sum()

0

In [24]:
df2['species'].value_counts()

Iris-virginica     50
Iris-versicolor    50
Iris-setosa        50
Name: species, dtype: int64

In [22]:
### apply KNN model to Iris dataset

In [26]:
X2 = df2.iloc[:,[0,1,2,3]]
Y2 = df2['species']

In [27]:
X2.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [28]:
from sklearn.neighbors import KNeighborsClassifier

In [29]:
knn1 = KNeighborsClassifier(n_neighbors = 3)
knn1

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [30]:
knn1.fit(X2,Y2)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [31]:
Y_predict2 = knn1.predict(X2)

In [32]:
accuracy_score(Y2, Y_predict2)

0.96

In [33]:
### Increase K no.of neighbours 
knn2 = KNeighborsClassifier(n_neighbors = 5)
knn2

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [34]:
knn2.fit(X2,Y2)
Y_predict3 = knn2.predict(X2)

In [35]:
accuracy_score(Y2, Y_predict3)

0.9666666666666667