In [38]:
from PIL import Image
import pandas as pd
from tqdm import tqdm
import numpy as np
import os
from sklearn.decomposition import PCA

import feature_extraction

ModuleNotFoundError: No module named 'sklearn'

In [27]:
def distance_ecu(x_train, x_test_point):
    distances = np.linalg.norm(x_train - x_test_point, axis=-1)
    return distances


def nearest_neighbors(distance_point, K):
    rank = np.argsort(distance_point)
    df_nearest = rank[:K]
    return df_nearest
     

def voting(df_nearest, y_train):
    tmp = y_train[df_nearest]
    index, counts = np.unique(tmp, return_counts=True)
    y_pred = index[np.argmax(counts)]
    return y_pred

def KNN_from_scratch(x_train, y_train, x_test, K):
    y_pred = []
    for sample in x_test:
        distances = distance_ecu(x_train, sample)
        df_nearest = nearest_neighbors(distances, K)
        pred = voting(df_nearest, y_train)
        y_pred.append(pred)
    return y_pred  

def print_result(result, y_test):
    true_positive = y_test == result
    return np.count_nonzero(true_positive) / len(true_positive)


In [18]:

class ImageClassifier:
    def __init__(self, train_path, test_path):
        self.train_path = train_path
        self.test_path = test_path
        self.train_set = os.listdir(train_path)
        self.test_set = os.listdir(test_path)
        self.fit()

    def fit(self):
        self.train_data = None 
        self.test_data = None 
        # for animals in self.train_set:
        # for simplicity i just used two classes
        for animals in ['bird', 'dog']:
            print(f'Now animal {animals}')
            tmp_path = os.path.join(self.train_path, animals)
            datafiles = os.listdir(tmp_path)
            print('Training sets...')
            for singlefile in tqdm(datafiles):
                img = feature_extraction.load_animals(os.path.join(tmp_path, singlefile))
                hiera = feature_extraction.gaussian_pyramide(img)
                feature_vector = feature_extraction.feature_extraction(hiera)
                if self.train_data is None:
                    self.train_data = feature_vector.copy()
                else:
                    self.train_data = np.vstack((self.train_data, feature_vector))
            
            # dont judge me, im just lazy
            print('Test sets...')
            tmp_path = os.path.join(self.test_path, animals)
            datafiles = os.listdir(tmp_path)
            for singlefile in tqdm(datafiles):
                img = feature_extraction.load_animals(os.path.join(tmp_path, singlefile))
                hiera = feature_extraction.gaussian_pyramide(img)
                feature_vector = feature_extraction.feature_extraction(hiera)
                if self.test_data is None:
                    self.test_data = feature_vector.copy()
                else:
                    self.test_data = np.vstack((self.test_data, feature_vector))
        # self.train_x = pd.DataFrame(self.train_data)
        # self.test_x = pd.DataFrame(self.test_data)
        # # thoes are hard coded, because im lazy
        # self.train_y = pd.DataFrame(np.hstack((np.zeros(600), np.ones(600))))
        # self.test_y = pd.DataFrame(np.hstack((np.zeros(200), np.ones(200))))
        self.train_x = self.train_data
        self.test_x = self.test_data
        # thoes are hard coded, because im lazy
        self.train_y = np.hstack((np.zeros(600), np.ones(600)))
        self.test_y = np.hstack((np.zeros(200), np.ones(200)))
            

In [19]:

ic = ImageClassifier(train_path='./LinnaeusDS/train/',
                        test_path='./LinnaeusDS/test/')

Now animal bird
Training sets...


100%|██████████| 600/600 [00:06<00:00, 88.28it/s]


Test sets...


100%|██████████| 200/200 [00:02<00:00, 76.17it/s]


Now animal dog
Training sets...


100%|██████████| 600/600 [00:07<00:00, 85.58it/s]


Test sets...


100%|██████████| 200/200 [00:02<00:00, 86.89it/s]


In [37]:
result = KNN_from_scratch(ic.train_x, ic.train_y, ic.test_x, 13)
print(print_result(result, ic.test_y))

0.6025


In [36]:
for k in range(1, 30, 2):
    result = KNN_from_scratch(ic.train_x, ic.train_y, ic.test_x, k)
    print(k, end=' ')
    print(print_result(result, ic.test_y))

1 0.5125
3 0.545
5 0.56
7 0.555
9 0.5725
11 0.5925
13 0.6025
15 0.5875
17 0.5975
19 0.58
21 0.59
23 0.6
25 0.5875
27 0.575
29 0.575
