**Goal:** Build a binary digit classifier, using "transfer learning". </br>
**Phases:**</br>
**1)** Load "mnist" dataset, and take only images of digits 0 or 1. </br>
**2)** Extract features using VGG16 for each image in both train and test   sets. (Note that this part refers to the transfer learning). </br>
**3)** Train k-nearest neighbors classifier on the training set,
   and evaluates it on the test set. </br>


In [5]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.datasets import mnist

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score


def extract_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # extract only images of digits "1" and "0"
    train_images = np.logical_or((y_train == 0), (y_train == 1))
    test_images = np.logical_or((y_test == 0), (y_test == 1))
    X_train, y_train = np.array(X_train[train_images]), np.array(y_train[train_images])
    X_test, y_test = np.array(X_test[test_images]), np.array(y_test[test_images])

    return (X_train[0:100], y_train[0:100]), (X_test[0:2000], y_test[0:2000])

def extract_features(X_data):

    # transform images to rgb as required by VGG
    X_data = tf.image.grayscale_to_rgb(tf.expand_dims(X_data, axis=3))
    # resize to minimum size of (32x32)
    X_data = tf.image.resize_with_pad(X_data, 32, 32)
    # normilaize pixels
    X_data = X_data / 255.

    # perform pixel scaling in a way that was performed
    # to images in the training dataset when the vgg16 model was developed
    from keras.applications.vgg16 import preprocess_input
    X_data = preprocess_input(X_data)

    # using model without last layers
    model_vgg16 = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=(32, 32, 3))
    #model_vgg16.summary() # TODO

    vgg16_feature = model_vgg16.predict(X_data)

    vgg16_feature_np = np.array(vgg16_feature)
    a, b, c, d = vgg16_feature_np.shape
    vgg16_feature_flatten = np.reshape(vgg16_feature_np, (a, b * c * d))

    return vgg16_feature_flatten


# loading the dataset
(X_train, y_train), (X_test, y_test) = extract_data()

# extract features
train_features = extract_features(X_train)
test_features = extract_features(X_test)

# train a KNN model on the training data
knn = KNeighborsClassifier(n_neighbors=5).fit(train_features, y_train)

# predict labels on the test set
y_pred = knn.predict(test_features)

# evaluate model results on the test set
print("KNN model was traind on", X_train.shape[0],"samples,"
      " and recived the following scores on test set of size", X_test.shape[0], ":")
print("accuracy_score", accuracy_score(y_test, y_pred))
print("precision_score", precision_score(y_test, y_pred))


KNN model was traind on 100 samples, and recived the following scores on test set of size 2000 :
accuracy_score 0.985
precision_score 0.9779816513761468
