In [None]:
from typing import Tuple

import pickle
import os
import json

import numpy as np
import cv2

# TODO: versions of libraries that will be used:
#  Python 3.6.10
#  numpy 1.18.3
#  scikit-learn 0.22.2.post1
#  opencv-python 4.2.0.34


def load_dataset(dataset_dir_path: str) -> Tuple[np.ndarray, np.ndarray]:
    x, y = [], []
    for i, class_dir in enumerate(sorted(os.listdir(dataset_dir_path))):   # Sortowanie klas alfabetycznie
        class_dir_path = os.path.join(dataset_dir_path, class_dir)
        for file in os.listdir(class_dir_path):
            img_file = cv2.imread(os.path.join(class_dir_path, file), cv2.IMREAD_GRAYSCALE)
            x.append(img_file)
            y.append(i)
    return np.asarray(x), np.asarray(y)


def descriptor2histogram(descriptor, vocab_model, normalize=True) -> np.ndarray:
    features_words = vocab_model.predict(descriptor)
    histogram = np.zeros(vocab_model.n_clusters, dtype=np.float32)
    unique, counts = np.unique(features_words, return_counts=True)
    histogram[unique] += counts
    if normalize:
        histogram /= histogram.sum()
    return histogram


def apply_feature_transform(
        data: np.ndarray,
        feature_detector_descriptor,
        vocab_model
) -> np.ndarray:
    data_transformed = []
    for image in data:
        keypoints, image_descriptor = feature_detector_descriptor.detectAndCompute(image, None)
        bow_features_histogram = descriptor2histogram(image_descriptor, vocab_model)
        data_transformed.append(bow_features_histogram)
    return np.asarray(data_transformed)


def data_processing(x: np.ndarray) -> np.ndarray:
    # TODO: add data processing here
    for i in range(x.size):
        img_file = x[i]
        width = 768
        high = int(img_file.shape[0]*width/img_file.shape[1])
        if width < img_file.shape[1] and high < img_file.shape[0]:
           img_file = cv2.resize(img_file, (width, high))
        x[i] = img_file
    return x


def project():
    np.random.seed(42)

    # TODO: fill the following values
    first_name = 'Maciej'
    last_name = 'Olejniczak'

    x, y = load_dataset('./../../test_data/')
    #x, y = load_dataset('/content/drive/My Drive/projekt1/test')
    #print(x[2].shape,x.size, y)
    x = data_processing(x)
    #print(x[2].shape,x.size, y)

    # TODO: create a detector/descriptor here. Eg. cv2.AKAZE_create()
    feature_detector_descriptor = cv2.AKAZE_create()

    # TODO: train a vocabulary model and save it using pickle.dump function
    vocab_model = pickle.load(open(f'./vocab_model.p', 'rb'))
    #vocab_model = pickle.load(open(f'/content/drive/My Drive/projekt1/vocab_model.p', 'rb'))
    
    x_transformed = apply_feature_transform(x, feature_detector_descriptor, vocab_model)
    
    # TODO: train a classifier and save it using pickle.dump function
    clf = pickle.load(open(f'./clf.p', 'rb'))
    #clf = pickle.load(open(f'/content/drive/My Drive/projekt1/clf.p', 'rb'))

    score = clf.score(x_transformed, y)
    print(f'{first_name} {last_name} score: {score}')
    with open(f'{last_name}_{first_name}_score.json', 'w') as f:
        json.dump({'score': score}, f)


if __name__ == '__main__':
    project()

vocab <class 'sklearn.cluster._kmeans.KMeans'>
tansmormajtor <class 'numpy.ndarray'>
Maciej Olejniczak score: 0.85
