In [1]:
# Import and load iris dataset from sklearn
from sklearn.datasets import load_iris
import numpy as np
import keras
np.random.seed(10) # Generates same random sequence each time

iris = load_iris()

In [2]:
print('keys in iris dictionary: ', iris.keys())

X = iris['data']
print('First 3 entries of X:', '\n', X[:3])

Y = iris['target']
print('First 3 entries of Y:', '\n', Y[:3])

names = iris['target_names']
print('names:', names)
feature_names = iris['feature_names']
print('feature_names:', feature_names)

# Track a few sample points
isamples = np.random.randint(len(Y), size = (5))
print(isamples)

keys in iris dictionary:  dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
First 3 entries of X: 
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]]
First 3 entries of Y: 
 [0 0 0]
names: ['setosa' 'versicolor' 'virginica']
feature_names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
[  9 125  15  64 113]


In [3]:
# Shape of Data
print('Shape of X:', X.shape)
print('Shape of Y:', Y.shape)
print('X - samples:', X[isamples])
print('Y- samples:', Y[isamples])

Shape of X: (150, 4)
Shape of Y: (150,)
X - samples: [[4.9 3.1 1.5 0.1]
 [7.2 3.2 6.  1.8]
 [5.7 4.4 1.5 0.4]
 [5.6 2.9 3.6 1.3]
 [5.7 2.5 5.  2. ]]
Y- samples: [0 2 0 1 2]


In [4]:
from keras.utils import to_categorical

Ny = len(np.unique(Y)) # Ny is number of categories/classes
print('Ny: ', Ny)

Y = to_categorical(Y[:], num_classes = Ny) # converted to 1-hot

print('X - samples:', X[isamples])
print('Y- samples:', Y[isamples])

Ny:  3
X - samples: [[4.9 3.1 1.5 0.1]
 [7.2 3.2 6.  1.8]
 [5.7 4.4 1.5 0.4]
 [5.6 2.9 3.6 1.3]
 [5.7 2.5 5.  2. ]]
Y- samples: [[1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20, random_state = 1)
print('X_train shape:', X_train.shape)
print('X_test.shape:', X_test.shape)

X_train shape: (120, 4)
X_test.shape: (30, 4)


In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train) # Computes the mean and standard deviation

X_train = scaler.transform(X_train) # Perform transformation: x = (x-mean)/std
X_test = scaler.transform(X_test)
print('X_train: \n', X_train[:5]) # first 5 samples of X_train
print('Y_train: \n', Y_train[:5])

X_train: 
 [[ 0.31553662 -0.04578885  0.44767531  0.23380268]
 [ 2.2449325  -0.04578885  1.29769171  1.39742892]
 [-0.2873996  -1.24028061  0.05100098 -0.15407273]
 [ 0.67729835 -0.52358555  1.01435291  1.13884531]
 [-0.04622511 -0.52358555  0.73101411  1.52672073]]
Y_train: 
 [[0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [7]:
# define function to add column of 1's
addlcol = lambda x: np.concatenate((x, np.ones((x.shape[0], 1))), axis = 1)

Ns, Nx = X_train.shape
print('Ns: ', Ns, 'Nx: ', Nx)

def find_weights(A, Y):
    print(A.shape)
    
    print(Y.shape)

    w = np.linalg.inv(A.T.dot(A)).dot(A.T.dot(Y))
    return w

A = addlcol(X_train)
Y = Y_train
w = find_weights(A, Y)
print(w)

Ns:  120 Nx:  4
(120, 5)
(120, 3)
[[ 0.06062508  0.05806364 -0.11868873]
 [ 0.10258458 -0.21112954  0.10854496]
 [-0.4070331   0.22897614  0.17805695]
 [-0.03856453 -0.30252108  0.34108561]
 [ 0.325       0.30833333  0.36666667]]


In [8]:
def evaluate(X, W, Yd, transform_X_a):
  a = transform_X_a(X)
  yd = np.argmax(Yd, axis = 1)
  y = np.argmax(a.dot(W), axis = 1)
  print('Confusion Matrix:')
  print(confusion_matrix(yd, y))


evaluate(X_train, w, Y_train, addlcol)
evaluate(X_test, w, Y_test, addlcol)

Confusion Matrix:
[[39  0  0]
 [ 0 22 15]
 [ 0  4 40]]
Confusion Matrix:
[[11  0  0]
 [ 0  6  7]
 [ 0  0  6]]


In [9]:
addSqlcol = lambda x: np.concatenate((x, x**2, np.ones((x.shape[0], 1))), axis = 1)

A = addSqlcol(X_train)
Y = Y_train
w = find_weights(A, Y)
print(w)
evaluate(X_train, w, Y_train, addSqlcol)
evaluate(X_test, w, Y_test, addSqlcol)

(120, 9)
(120, 3)
[[ 0.02212851  0.1284788  -0.15060731]
 [ 0.02261966  0.00396191 -0.02658157]
 [-0.31265762 -0.00385162  0.31650925]
 [-0.05916917 -0.24649968  0.30566885]
 [-0.03340995  0.13233974 -0.09892979]
 [-0.00993575  0.01269958 -0.00276383]
 [ 0.21451737 -0.58556233  0.37104496]
 [ 0.04009892 -0.0947794   0.05468048]
 [ 0.1137294   0.84363575  0.04263485]]
Confusion Matrix:
[[39  0  0]
 [ 0 35  2]
 [ 0  2 42]]
Confusion Matrix:
[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
