In [208]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np 
import pandas as pd
import os  
import random

In [209]:
train_path = "../pokemon_dataset/train/"

In [210]:
# preprocess image size
import cv2
class SimplePreprocessor:
    def __init__(self, width, height, inter=cv2.INTER_AREA):
        self.width = width
        self.height = height
        self.inter = inter
        
    def preprocess(self, image):
        return cv2.resize(image, (self.width, self.height), interpolation=self.inter)

In [211]:
# load images from path to array
class SimpleDatasetLoader:
    def __init__(self, preprocessors=None):
        self.preprocessors = preprocessors
        if self.preprocessors is None:
            self.preprocessors = []

    def load(self, imagePaths, label, verbose=-1):
        data = []
        labels = []
        
        for (i, imagePath) in enumerate(imagePaths):
            if True:
                image = cv2.imread(imagePath)
                label = label

                if image is None:
                    continue

                if self.preprocessors is not None:
                    for p in self.preprocessors:
                        image = p.preprocess(image)        
                    data.append(image)
                    labels.append(label)
            if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
                print("[INFO] processed {}/{}".format(i + 1,len(imagePaths)))
            
        return (np.array(data), np.array(labels))

In [212]:
def process_img(path, label):
    imagePaths = os.listdir(path)
    imagePaths = [path+"/"+i for i in imagePaths]

    sp = SimplePreprocessor(128, 128)
    sdl = SimpleDatasetLoader(preprocessors=[sp])
    (data, labels) = sdl.load(imagePaths, verbose=500, label=label)
    data = data.reshape((data.shape[0], 49152))

    print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1024.0)))

    return data, labels

In [None]:
(data_pikachu, pikachu_labels) = process_img(train_path + 'pikachu', 1)
(data_raichu, raichu_labels) = process_img(train_path + 'raichu', 0)

x_train = np.concatenate([data_pikachu, data_raichu])
y_train = np.concatenate([pikachu_labels, raichu_labels])

idx = np.arange(len(x_train))
random.shuffle(idx)
x_train = x_train[idx]
y_train = y_train[idx]

# control data size
'''n = 80
x_train = x_train[:n, :]
y_train = y_train[:n]'''

In [214]:
from sklearn.utils import shuffle
x_train, y_train = shuffle(x_train, y_train, random_state=0)

In [215]:
model = KNeighborsClassifier(n_neighbors=9, n_jobs=1)

In [None]:
from sklearn.model_selection import StratifiedKFold

strtfdKFold = StratifiedKFold(n_splits=10)
kfold = strtfdKFold.split(x_train, y_train)
scores = []

for k, (train, test) in enumerate(kfold):
    model.fit(x_train[train], y_train[train])
    score = model.score(x_train[test], y_train[test])
    scores.append(score)
    print('Fold: %2d, Training/Test Split Distribution: %s, Accuracy: %.3f' % (k+1, np.bincount(y_train[train]), score))

print('\n\nCross-Validation accuracy: %.4f +/- %.3f' %(np.mean(scores), np.std(scores)))