In [73]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
import numpy as np
import os  

In [74]:
train_path = "../orange_dataset/train/"
test_path = "../orange_dataset/test/"

In [75]:
# simple preprocess, resize
import cv2
class SimplePreprocessor:
    def __init__(self, width, height, inter=cv2.INTER_AREA):
        self.width = width
        self.height = height
        self.inter = inter
        
    def preprocess(self, image):
        return cv2.resize(image, (self.width, self.height), interpolation=self.inter)

In [76]:
# put in path then output arrays of preprocessed images and labels
class SimpleDatasetLoader:
    def __init__(self, preprocessor=None):
        self.preprocessor = preprocessor

    def load(self, path, type, label, verbose=-1):
        data = []
        labels = []
        Image_path = path + type

        imagePaths = os.listdir(Image_path)
        imagePaths = [Image_path+"/"+i for i in imagePaths]
        
        for (i, imagePath) in enumerate(imagePaths):
            #if(i >= 1000 and path == train_path): # take less datas to test about data size
            #    break
            if True:
                image = cv2.imread(imagePath)
                label = label

                if image is None:
                    continue

                if self.preprocessor is not None:
                    image = self.preprocessor.preprocess(image)        
                    data.append(image)
                    labels.append(label)
            if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
                print("[INFO] processed {}/{}".format(i + 1,len(imagePaths)))
            
        return (np.array(data), np.array(labels))

In [77]:
def process_img(path, type, label):
    sp = SimplePreprocessor(128, 128)
    sdl = SimpleDatasetLoader(sp)
    (data, labels) = sdl.load(path, type, verbose=500, label=label)
    data = data.reshape((data.shape[0], 49152))

    print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1024.0)))

    return data, labels

In [78]:
(data_fresh, fresh_labels) = process_img(train_path, 'fresh', 1)
(data_rotten, rotten_labels) = process_img(train_path, 'rotten', 0)
(tdata_fresh, fresh_tlabels) = process_img(test_path, 'fresh', 1)
(tdata_rotten, rotten_tlabels) = process_img(test_path, 'rotten', 0)

x_train = np.concatenate([data_fresh, data_rotten])
y_train = np.concatenate([fresh_labels, rotten_labels])
x_test = np.concatenate([tdata_fresh, tdata_rotten])
y_test = np.concatenate([fresh_tlabels, rotten_tlabels])

[INFO] processed 500/1466
[INFO] processed 1000/1466
[INFO] features matrix: 68.7MB
[INFO] processed 500/1595
[INFO] processed 1000/1595
[INFO] processed 1500/1595
[INFO] features matrix: 74.8MB
[INFO] features matrix: 18.2MB
[INFO] features matrix: 18.9MB


In [79]:
model = KNeighborsClassifier(n_neighbors=9, n_jobs=1)
model.fit(x_train, y_train)

KNeighborsClassifier(n_jobs=1, n_neighbors=9)

In [80]:
from sklearn import metrics
y_pred = model.predict(x_test)
precision = metrics.accuracy_score(y_pred, y_test) * 100
print("Accuracy with KNN: {0:.2f}%".format(precision))

Accuracy with KNN: 85.46%


In [81]:
print(classification_report(y_test, model.predict(x_test)))

              precision    recall  f1-score   support

           0       0.91      0.79      0.85       403
           1       0.81      0.92      0.86       388

    accuracy                           0.85       791
   macro avg       0.86      0.86      0.85       791
weighted avg       0.86      0.85      0.85       791

