In [50]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

#importing original dataset
data_orig = np.loadtxt('a2_data/data_banknote_authentication.txt', delimiter=',')

In [59]:
#Dataset : 

print("Dataset : \n\n"+ str(data_orig))
print("\nDimensions of dataset : "+str(data_orig.shape))

Dataset : 

[[ -1.8584    7.886    -1.6643   -1.8384    0.     ]
 [ -2.5754   -5.6574    6.103     0.65214   1.     ]
 [ -3.0061  -12.2377   11.9552   -2.1603    1.     ]
 ...
 [ -0.7869    9.5663   -3.7867   -7.5034    0.     ]
 [ -2.1674    0.12415  -1.0465   -0.86208   1.     ]
 [  2.9742    8.96     -2.9024   -1.0379    0.     ]]

Dimensions of dataset : (1372, 5)


In [79]:
#Seed for np.random
seed=1
np.random.seed(seed)    

In [80]:
#Shuffling imported original dataset
np.random.shuffle(data_orig)  

In [97]:
#Shuffled dataset
print("Shuffled dataset with (Seed "+str(seed) +") :\n\n"+str(data_orig))

Shuffled dataset with (Seed 1) :

[[ 3.6216   8.6661  -2.8073  -0.44699  0.     ]
 [ 1.6799   4.2068  -4.5398  -2.3931   1.     ]
 [-2.1652   3.0211  -2.4132  -2.4241   1.     ]
 ...
 [-0.16682  5.8974   0.49839 -0.70044  0.     ]
 [ 3.8027   0.81529  2.1041   1.0245   0.     ]
 [ 1.4378   0.66837 -2.0267   1.0271   1.     ]]


In [71]:
#Extacting Y
y_orig = data_orig[:,-1]
print("Output Y   :"+str(y_orig))
print("Shape of Y : "+str(y_orig.shape))

Output Y   :[1. 0. 0. ... 0. 1. 1.]
Shape of Y : (1372,)


In [113]:
#Getting rid of Rank 1 array
Y = np.reshape(y_orig,(y_orig.shape[0],1)).T    
print("Shape of Y: "+ str(Y.shape))

Shape of Y: (1, 1372)


In [114]:
#Extracting vectorized input feature X (transposed)
x_shuffled = data_orig[:,(0,1,2,3)].T
print("Input set : \n\n" +str(x_shuffled))

Input set : 

[[ 3.6216   1.6799  -2.1652  ... -0.16682  3.8027   1.4378 ]
 [ 8.6661   4.2068   3.0211  ...  5.8974   0.81529  0.66837]
 [-2.8073  -4.5398  -2.4132  ...  0.49839  2.1041  -2.0267 ]
 [-0.44699 -2.3931  -2.4241  ... -0.70044  1.0245   1.0271 ]]


In [115]:
#Standardizeing input vector X

x_shuffled_mean = np.mean(x_shuffled,axis=1, keepdims=True)
x_shuffled_std = np.std(x_shuffled, axis=1, keepdims=True)

print("Mean of each row : \n\n"+str(x_shuffled_mean))
print("\nStandard deviation of each row : \n\n"+str(x_shuffled_std))

X = (x_shuffled - x_shuffled_mean)/x_shuffled_std   #Python Broadcasting
print("\nInput set : \n\n"+str(X))

Mean of each row : 

[[ 0.43373526]
 [ 1.92235312]
 [ 1.39762712]
 [-1.19165652]]

Standard deviation of each row : 

[[2.84172641]
 [5.86690749]
 [4.30845909]
 [2.10024732]]

Input set : 

[[ 1.12180565  0.43852383 -0.9145621  ... -0.21133465  1.18553452
   0.35332914]
 [ 1.14945512  0.38937837  0.18727871 ...  0.677537   -0.1886962
  -0.21373835]
 [-0.97597007 -1.37808599 -0.88449885 ... -0.20871432  0.16397345
  -0.7947916 ]
 [ 0.35456135 -0.57204857 -0.58680874 ...  0.23388508  1.05518836
   1.05642631]]


In [116]:
print(X.shape)

(4, 1372)


In [127]:
#Data set now
print("\n-----------------\nInput: \n\n"+str(X))
print("\nShape : "+str(X.shape))
print("\n------------------\n\nOutput: \n\n"+str(Y))
print("\nShape : "+str(Y.shape))


-----------------
Input: 

[[ 1.12180565  0.43852383 -0.9145621  ... -0.21133465  1.18553452
   0.35332914]
 [ 1.14945512  0.38937837  0.18727871 ...  0.677537   -0.1886962
  -0.21373835]
 [-0.97597007 -1.37808599 -0.88449885 ... -0.20871432  0.16397345
  -0.7947916 ]
 [ 0.35456135 -0.57204857 -0.58680874 ...  0.23388508  1.05518836
   1.05642631]]

Shape : (4, 1372)

------------------

Output: 

[[0. 1. 1. ... 0. 0. 1.]]

Shape : (1, 1372)


In [137]:
#Splitting into Train, Test sets ( with a fixed seed )
train_split_percent = 80
test_split_percent = 20

train_X , test_X = X[:, : int( (train_split_percent/100)*X.shape[1])] , X[:,int( (train_split_percent/100)*X.shape[1]) : ]
print("Seed of Randomization : "+str(seed))
print("Shape of Training set : "+str(train_X.shape))
print("Shape of Test set     : "+str(test_X.shape))

Seed of Randomization : 1
Shape of Training set : (4, 1097)
Shape of Test set     : (4, 275)


In [32]:
import matplotlib.pyplot as plt
v