In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import os
import pathlib
import sys
import random
import shutil
import cv2 as cv
import matplotlib.pyplot as plt
from operator import itemgetter
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

from scipy.spatial import distance 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data = pathlib.Path('/content/drive/My Drive/datasetproject/')
train_data = pathlib.Path('/content/drive/My Drive/datasetproject/train')
test_data = pathlib.Path('/content/drive/My Drive/datasetproject/test')

K = 3

num_class = 8
IMAGE_SIZE = 256
num_folds = 10

# container for metrics
acc_folds = []
f1_folds = []
prec_folds = []
recall_folds = []

Histogram of Image

In [None]:
def HECOLOR (image):
    channels = cv.split(image)
    eq_channels = []
    for ch, color in zip(channels, ['B', 'G', 'R']):
        eq_channels.append(cv.equalizeHist(ch))

    eq_image = cv.merge(eq_channels)
    eq_image = cv.cvtColor(eq_image, cv.COLOR_BGR2RGB)

    return eq_image/255

Load the dataset and cal the Histogram

In [None]:
def load_data(data_directory):
    directories = [d for d in os.listdir(data_directory)
                   if os.path.isdir(os.path.join(data_directory, d))]
    labels = []
    images = []
    for d in directories:
        label_directory = os.path.join(data_directory, d)
        file_names = [os.path.join(label_directory, f)
                      for f in os.listdir(label_directory)]
        
        for f in file_names:
            img = cv.imread(f)
            img = cv.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
            ######################################
            # 1 - Calculate the Histo/ Hough
            # 2 - Convert to numpy array
            ######################################
            img = np.array(HECOLOR(img))
            
            images.append(img/255)
            labels.append(int(d))

    images, labels = np.asarray(images), np.asarray(labels)

    return images, labels


In [None]:

class KNN:
    '''
    k nearest neighboors algorithm class
    __init__() initialize the model
    train() trains the model
    predict() predict the class for a new point
    '''

    def __init__(self, K):
        '''
        INPUT :
        - K : is a natural number bigger than 0 
        '''        
        # empty initialization of X and y
        self.X = []
        self.y = []
        # K is the parameter of the algorithm representing the number of neighborhoods
        self.k = K
        
    def fit(self,X,y):
        '''
        INPUT :
        - X : is a 2D Nx2 numpy array containing the coordinates of points
        - y : is a 1D Nx1 numpy array containing the labels for the corrisponding row of X
        '''        
        
        self.X=X.copy() # copy your training points
        self.y=y.copy()
       
    def predict(self,X_new):
        '''
        INPUT :
        - X_new : is a Mx2 numpy array containing the coordinates of new points whose label has to be predicted
        
        OUTPUT :
        - y_hat : is a Mx1 numpy array containing the predicted labels for the X_new points
        ''' 
            
        ######### YOUR CODE HERE - do not delete this line ################
        dist = []
        y_hat = []
        for i in range(len(X_new)):
            dist *= 0
            for j in range(len(self.X)):
                d = distance.euclidean(X_new[i],self.X[j])
                # Appende the label relatade to X_new in i position with the euclidean distance
                dist.append((self.y[j],d))

            # Sort the euclidean distance
            dist.sort(key=itemgetter(1))
    
            # Find the k n closest labes 
            dist_length = len(dist) 
            for n in range (0, (dist_length-self.k)):
                dist.pop()
            mp = list(map(itemgetter(0), dist))
            label = max(mp, key=mp.count)
            y_hat.append(label)

        y_hat = np.array(y_hat)

        return y_hat

In [None]:
K = 7

imagens, labels = load_data(str(train_data))
imagens = imagens.reshape((imagens.shape[0], IMAGE_SIZE*IMAGE_SIZE*3))

print('[INFO]: Training Features Matrix: {:.1f}MB'.format(float(imagens.nbytes / 1024*1000.0)))

imgs_test, labs_test = load_data(test_data)
imgs_test = imgs_test.reshape((imgs_test.shape[0], IMAGE_SIZE*IMAGE_SIZE*3))

print('[INFO]: Testing Features Matrix: {:.1f}MB'.format(float(imgs_test.nbytes / 1024*1000.0)))

print('[INFO]: Classification starting....')

list_k = [1, 2, 3, 4, 5, 7, 9, 11, 12, 15, 17, 20, 25, 30]

for k in list_k:
  model = KNN(K=k)
  model.fit(imagens, labels)
  
  print("Training...... knn with k = {}".format(k))

  labs_predict = model.predict(imgs_test)
  
  accuracy = metrics.accuracy_score(labs_test, labs_predict)
  print("The k = {} accuracy= {}".format(k, accuracy))
