# _K_ *N*earest *N*eighbor

In [1]:
# common packages
import tensorflow as tf
import numpy as np
import sys
import os

# displaying images
from matplotlib.pyplot import imshow
%matplotlib inline

# download data
from six.moves.urllib.request import urlopen


# Helper to make the output consistent
SEED = 42
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()


# set log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU found')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))



Python: (3, 5, 4, 'final', 0)
TensorFlow: 1.4.0
Default GPU Device: /device:GPU:0


In [2]:
## Download data paths
ROOT_DATA = "../../ROOT_DATA/"
DATA_DIR = "IRIS"

IRIS_TRAINING_PATH = os.path.join(ROOT_DATA, DATA_DIR, "iris_training.csv")
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"

IRIS_TEST_PATH = os.path.join(ROOT_DATA, DATA_DIR, "iris_test.csv")
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [3]:
# download data
## training
if not os.path.exists(IRIS_TRAINING_PATH):
    raw = urlopen(IRIS_TRAINING_URL).read()
    with open(IRIS_TRAINING_PATH, "wb") as f:
        f.write(raw)
    print(IRIS_TRAINING_PATH, "path written")
else:
    print(IRIS_TRAINING_PATH, "path exists")

## test
if not os.path.exists(IRIS_TEST_PATH):
    raw = urlopen(IRIS_TEST_URL).read()
    with open(IRIS_TEST_PATH, "wb") as f:
        f.write(raw)
    print(IRIS_TEST_PATH, "path written")
else:
    print(IRIS_TEST_PATH, "path exists")

../../ROOT_DATA/IRIS/iris_training.csv path exists
../../ROOT_DATA/IRIS/iris_test.csv path exists


In [4]:
# Load dataset
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
                  filename=IRIS_TRAINING_PATH,
                  target_dtype=np.int,
                  features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
              filename=IRIS_TEST_PATH,
              target_dtype=np.int,
              features_dtype=np.float32)

In [5]:
# print some dataset information
print("training shape:", training_set.data.shape)
print("test shape:", test_set.data.shape)

# quick check
assert training_set.data.shape[1] == test_set.data.shape[1], \
    "Rut row raggy, features don't match ({} vs {})".format(training_set.data.shape[1],
                                                            test_set.data.shape[0])
if training_set.data.shape[1] == test_set.data.shape[1]:
    NUM_FEATURES = training_set.data.shape[1]
    print("num features = {}".format(NUM_FEATURES))
    
print("training target information: {} targets, {} classes".format(\
    len(training_set.target), len(set(training_set.target))))
print("test target information: {} targets, {} classes".format(\
    len(test_set.target), len(set(test_set.target))))

training shape: (120, 4)
test shape: (30, 4)
num features = 4
training target information: 120 targets, 3 classes
test target information: 30 targets, 3 classes


In [6]:
# create feature column
# in this example training_set.data.shape[1] is 4
feature_columns = [tf.feature_column.numeric_column("x", shape=[NUM_FEATURES])]
if len(set(training_set.target)) == len(set(test_set.target)):
    NUM_CLASSES = len(set(training_set.target))
    print("Number of classes = {}".format(NUM_CLASSES))
else:
    print("number of classes in training and test set don't match")
    NUM_CLASSES = len(set(training_set.target))
    print("WARNING: num classes has been set to {} to match training set".format(NUM_CLASSES))

Number of classes = 3


In [41]:
X_train, Y_train = training_set
X_test, Y_test = test_set
print(X_train.shape)
print(Y_train.shape)
print(Y_train[0])
print(Y_test)

(120, 4)
(120,)
2
[1 2 0 1 1 1 0 2 1 2 2 0 2 1 1 0 1 0 0 2 0 1 2 1 1 1 0 1 2 1]


In [46]:
# place holder
xtr = tf.placeholder(tf.float32, [None, NUM_FEATURES], name="X")
ytrs = tf.placeholder(tf.int32, [None], name="y")
xte = tf.placeholder(tf.float32, [NUM_FEATURES], name="te")
ytr = tf.one_hot(ytrs, depth=NUM_CLASSES, axis=1, name="jack")

K = 15
nearest_neighbors = tf.Variable(tf.zeros([K]))

# model
l1_dist = tf.negative(tf.reduce_sum(tf.abs(tf.subtract(xtr, xte)), axis=1))
# l2_dist = tf.reduce_sum(tf.square(tf.subtract(X, target_x)), axis=1)

vals, indices = tf.nn.top_k(l1_dist, k=K, sorted=False)

nn = []
for i in range (K):
    nn.append(tf.argmax(ytr[indices[i]], axis=0))

y, idx, count = tf.unique_with_counts(nn)

pred = tf.slice(y, begin=[tf.argmax(count, 0)], size=tf.constant([1], dtype=tf.int64))[0]

accuracy = 0.0

init = tf.global_variables_initializer()

In [47]:
with tf.Session() as sess:
    sess.run(init)
    for i in range(X_test.shape[0]):
        # return predicted value
        pred_val = sess.run([pred], feed_dict={xtr:X_train,ytrs:Y_train,xte:X_test[i,:]})
        print("Test",i,"Prediction",pred_val,"True Class:",Y_test[i])
        if pred_val == Y_test[i]:
            accuracy += 1. / len(X_test)
    print(K,"-th neighbors' Accuracy is:",accuracy)

Test 0 Prediction [1] True Class: 1
Test 1 Prediction [2] True Class: 2
Test 2 Prediction [0] True Class: 0
Test 3 Prediction [1] True Class: 1
Test 4 Prediction [1] True Class: 1
Test 5 Prediction [1] True Class: 1
Test 6 Prediction [0] True Class: 0
Test 7 Prediction [2] True Class: 2
Test 8 Prediction [1] True Class: 1
Test 9 Prediction [2] True Class: 2
Test 10 Prediction [2] True Class: 2
Test 11 Prediction [0] True Class: 0
Test 12 Prediction [2] True Class: 2
Test 13 Prediction [1] True Class: 1
Test 14 Prediction [1] True Class: 1
Test 15 Prediction [0] True Class: 0
Test 16 Prediction [1] True Class: 1
Test 17 Prediction [0] True Class: 0
Test 18 Prediction [0] True Class: 0
Test 19 Prediction [2] True Class: 2
Test 20 Prediction [0] True Class: 0
Test 21 Prediction [1] True Class: 1
Test 22 Prediction [2] True Class: 2
Test 23 Prediction [1] True Class: 1
Test 24 Prediction [1] True Class: 1
Test 25 Prediction [1] True Class: 1
Test 26 Prediction [0] True Class: 0
Test 27 Pre

In [62]:
feat_0_min, feat_0_max = X_train[:, 0].min(), X_train[:, 0].max()
feat_1_min, feat_1_max = X_train[:, 1].min(), X_train[:, 1].max()
feat_2_min, feat_2_max = X_train[:, 2].min(), X_train[:, 2].max()
feat_3_min, feat_3_max = X_train[:, 3].min(), X_train[:, 3].max()
step = 0.2

In [None]:
xx, yy = np.meshgrid(np.arange(feat_0_min, feat_0_max, step))
z = sess.run([pred], feed_dict={xtr:X_train,ytrs:Y_train,xte:xx})