# Dev Process

In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
num_dev = 3
dim_dev = 3
err_stand_dev = 0.02

import gen_synthetic as gs
X_dev, Y_dev = gs.gen_linear_synthetic(num_dev, dim_dev, err_stand_dev)
print '\nX is: \n', X_dev
print '\nY is: \n', Y_dev

X has been generated.
W has been generated.
Y has been generated.

X is: 
[[ 0.55382965 -1.2877871  -0.0075365 ]
 [-0.77906214 -0.26125276 -0.5050602 ]
 [ 1.71078996  1.3411595   0.591867  ]]

Y is: 
[[ 1.]
 [-1.]
 [-1.]]


### Partition Dataset 

Use the `gen_linear_synthetic` method to all the observing data first. And then, partition them into three categories as: **training, validation** and **test**.

In [3]:
# Generate the whole observed data
num_data = 1000
dim_data = 10
err_stand = 0.001

num_train = int(num_data * 0.7)
num_val = int(num_data * 0.15)

X, Y = gs.gen_linear_synthetic(num_data, dim_data, err_stand)

# Partition data into 'train, validation, test' parts.
# Here we use 'np.random.permutation' to track shuffled 
# indices. Because the random impact will act on both X
# and Y, instead of using 'np.random.shuffle(x)' directly.
shuffledIndx = np.random.permutation(num_data)
X_train = X[shuffledIndx[: num_train]]
Y_train = Y[shuffledIndx[: num_train]]
X_val = X[shuffledIndx[num_train : num_train + num_val]]
Y_val = Y[shuffledIndx[num_train : num_train + num_val]]
X_test = X[shuffledIndx[num_train + num_val :]]
Y_test = Y[shuffledIndx[num_train + num_val :]]

print 'X_train shape: ', X_train.shape
print 'Y_train shape: ', Y_train.shape
print 'X_val shape: ', X_val.shape
print 'Y_val shape: ', Y_val.shape
print 'X_test shape: ', X_test.shape
print 'Y_test shape: ', Y_test.shape

X has been generated.
W has been generated.
Y has been generated.
X_train shape:  (700L, 10L)
Y_train shape:  (700L, 1L)
X_val shape:  (150L, 10L)
Y_val shape:  (150L, 1L)
X_test shape:  (150L, 10L)
Y_test shape:  (150L, 1L)


In [4]:
# [Terence]
# You can't directly import things from a folder,
# unless you use '__init__.py' to tell Python 
# interpreter that this folder should be treated
# like a Python package.
from classifiers import KNearestNeighbor

knn = KNearestNeighbor()
knn.train(X_train, Y_train)

Y_pred = knn.predict(X_val, k=3)
val_acc = np.mean(Y_pred == Y_val)
print 'val_acc = ', val_acc

Y_pred = knn.predict(X_test, k=3)
val_acc = np.mean(Y_pred == Y_test)
print 'val_acc = ', val_acc

val_acc =  0.514222222222
val_acc =  0.499288888889
