# K-Nearest Neighbors (K-NN)

Predicting the air quality using K-NN Model

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('csv_files/pollution_dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
print(y)

['Moderate' 'Moderate' 'Moderate' ... 'Moderate' 'Good' 'Moderate']


## Encoding categorical data

In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [5]:
print(y)

[2 2 2 ... 2 0 2]


## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
print(X_train)

[[ 0.19622095 -0.71533771 -0.63961989 ...  0.55762899 -0.50356467
   0.87220281]
 [ 2.23001197  0.42295797 -0.15579749 ...  2.56820501  0.72158124
   2.01396084]
 [ 1.34770557  0.93236102 -0.62293636 ...  1.10596791 -0.92122805
   0.79435567]
 ...
 [-1.22444188  0.07706702 -0.60208194 ... -1.01427589  0.55451589
  -1.87839611]
 [-1.19453319 -0.86627195 -0.53117694 ... -1.05083182  1.13924462
  -0.70420176]
 [-0.34213549  0.8568939   0.17370225 ... -0.52077087 -0.28081087
  -0.50309665]]


In [9]:
print(X_test)

[[ 0.34576441  0.65564825  0.70340436 ...  0.46623917  0.16469674
   0.94356268]
 [-0.19259203  0.45440261  0.34470845 ...  0.17379175  0.66589279
   0.26240022]
 [ 0.15135792 -0.61471489 -0.22670249 ...  0.33829343 -0.94907227
   0.85274102]
 ...
 [-0.3122268   0.13995628 -0.04318365 ... -0.33799123  1.3341542
  -0.11386096]
 [-1.79270702 -0.44491387  0.15284784 ... -1.06910978  1.05571194
  -0.13981   ]
 [-1.47866576 -0.93545014 -0.24755691 ... -0.94116404  1.00002349
   0.56730151]]


## Training the K-NN model on the Training set

In [10]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

## Predicting a new result

In [11]:
prediction = classifier.predict(sc.transform([[26.5,	70.7,	6.9, 16,	21.9,	5.6,	1.01,	12.7,	303]]))
air_quality = le.inverse_transform(prediction)
print(air_quality)

['Good']


## Predicting the Test set results

In [12]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[3 3]
 [2 2]
 [2 2]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Making the Confusion Matrix

In [13]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[513   0   0   0]
 [  0  91   0  34]
 [  5   0 361   8]
 [  0  11  31 196]]


0.9288