<a href="https://colab.research.google.com/github/Apoorv1401/Convolution_Neural_Networks/blob/main/Image_processing_with_Open_CV_and_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import cv2
import numpy as np

In [24]:
#We will make a processor function for preprocessing the images as CNN requires all images in dataset to have a fixed feature and size
#for images we have to preprocess our images to have same width and height
def preprocess(image, width, height, inter = cv2.INTER_AREA): #interpolation is used to control the algorithm used for resizing
  #resizing the image
  return cv2.resize(image, (height, width), interpolation=inter)

In [4]:
import os

In [56]:
#Function for loading our dataset
def loading_dataset(imagePaths, width, height, verbose = -1): #verbose is used here to give us a update as to how many images processed
  data = []
  labels = []

  for i, imagePath in enumerate(imagePaths):
    
    image = cv2.imread(imagePath)
    label = imagePath.split(os.path.sep)[-2]

    
    image = preprocess(image, width, height)

    data.append(image)
    labels.append(label)
    if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
      print("[INFO] processed {}/{}".format(i + 1,
      len(imagePaths)))
  return (np.array(data), np.array(labels))

In [27]:
'''So we have basically five steps to perform:
load dataset
preprocess the image
split the dataset
train the classifier
determine the accuracy
'''

'So we have basically five steps to perform:\nload dataset\npreprocess the image\nsplit the dataset\ntrain the classifier\ndetermine the accuracy\n'

In [11]:
'''WE need the following packages
KNeighborsClassifier for the implementation of the KNN algorithm
LabelEncoder for converting our labels which are strings to integers
train_test_split function for dividing the dataset into training and testing sets
'''

'WE need the following packages\nKNeighborsClassifier for the implementation of the KNN algorithm\nLabelEncoder for converting our labels which are strings to integers\ntrain_test_split function for dividing the dataset into training and testing sets\n'

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imutils import paths

In [42]:
#Lets load the images
print('Loading Images')
imagePaths = list(paths.list_images('/content/sample_data/Dataset'))

Loading Images


In [44]:
len(imagePaths)

3000

In [57]:
(data, labels) = loading_dataset(imagePaths, 32, 32, verbose=150)

[INFO] processed 150/3000
[INFO] processed 300/3000
[INFO] processed 450/3000
[INFO] processed 600/3000
[INFO] processed 750/3000
[INFO] processed 900/3000
[INFO] processed 1050/3000
[INFO] processed 1200/3000
[INFO] processed 1350/3000
[INFO] processed 1500/3000
[INFO] processed 1650/3000
[INFO] processed 1800/3000
[INFO] processed 1950/3000
[INFO] processed 2100/3000
[INFO] processed 2250/3000
[INFO] processed 2400/3000
[INFO] processed 2550/3000
[INFO] processed 2700/3000
[INFO] processed 2850/3000
[INFO] processed 3000/3000


In [58]:
data.shape

(3000, 32, 32, 3)

In [59]:
labels[:5]


array(['Cats', 'Cats', 'Cats', 'Cats', 'Cats'], dtype='<U6')

In [60]:
#For applying the KNN algorithm we need our data to be a single list of pixel intensities
#for that purpose we will use the reshape function in numpy

In [61]:
data = data.reshape((data.shape[0], 3072))

In [62]:
data.shape
#the image is not changed just the data is represented as 3000 images of 3072 dimension (32*32*3)

(3000, 3072)

In [63]:
le = LabelEncoder()


In [64]:
labels = le.fit_transform(labels)
#what this step does is change the labels to integers for example: Cats to 0, Dogs to 1, Pandas to 2

In [65]:
labels[:5]


array([0, 0, 0, 0, 0])

In [66]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state = 123)

In [67]:
#Fitting the KNN classifier and evaluating it
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [69]:
from sklearn.metrics import classification_report
#to Evaluate the performance of our model and have a formatted report in tabular format

In [71]:
print(classification_report(y_test, knn_model.predict(X_test),
 target_names=le.classes_))


              precision    recall  f1-score   support

        Cats       0.38      0.54      0.45       252
        Dogs       0.39      0.47      0.43       240
      Pandas       0.84      0.35      0.50       258

    accuracy                           0.45       750
   macro avg       0.54      0.45      0.46       750
weighted avg       0.54      0.45      0.46       750



In [72]:
#now we see that the accuracy is 54% which is not bad for a classifier that does not learn at all
#also we can notice that the Pandas were coorectly identified 84% of the time, probably because they look quite different from dogs and cats
#dogs and cats on the other hand were classified correctly 39% and 38%. It can be because their fur are quite similar in their shades and color.
