In [2]:
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt

In [3]:
load_fn = 'iris.mat'
load_data = sio.loadmat(load_fn)
# load_data is a dict with key named samples, labels, __globals__, __version__, __header__
X = load_data['samples']
X = X / 8
y = load_data['labels']
y_shape = load_data['labels'].shape
X_shape = load_data['samples'].shape
print('shape of data is',X_shape)
print('shape of labels is',y_shape)
X_train = X[0:int(X_shape[0]*0.8)]
y_train = y[0:int(y_shape[0]*0.8)]
X_test = X[int(X_shape[0]*0.8):]
y_test = y[int(y_shape[0]*0.8):]
print('shape of train data is',X_train.shape)
print('shape of train labels is',y_train.shape)
print('shape of test data is',X_test.shape)
print('shape of test labels is',y_test.shape)

shape of data is (150, 4)
shape of labels is (150, 3)
shape of train data is (120, 4)
shape of train labels is (120, 3)
shape of test data is (30, 4)
shape of test labels is (30, 3)


In [1]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [20]:
def softmax(Z):
    return np.exp(Z)/np.sum(np.exp(Z))

In [22]:
class iris:
    def __init__(self, n_h):
        self._h = n_h

    def fit(self, X_train, y_train, learningrate, epochs):
        input_shape = X_train.shape[1]
        output_shape = y_train.shape[1]
        m = X_train.shape[0]
        # initialize all variables
        W1 = np.random.randn(input_shape, self._h) * 0.01
        b1 = np.zeros((1, self._h))
        W2 = np.random.randn(self._h, output_shape) * 0.01
        b2 = np.zeros((1, output_shape))

        for i in range(epochs):
            # forward propagation
            Z1 = np.dot(X_train, W1) + b1
            A = sigmoid(Z1)
            Z2 = np.dot(A, W2) + b2

            # loss function using crosstropy
            loss = -np.sum(y_train*np.log(softmax(Z2)))/m
            loss = np.squeeze(loss)

            # back propagation
            # To simplify the names of variables, use the denominator to represent the derivative 
            # For example, dx means dl/dx
            dW2 = - np.dot(A.T, y_train/Z2) / m
            db2 = -np.sum(y_train/Z2/m, axis=0) / m
            db1 = np.dot(y_train/Z2, W2.T)*A*(1-A)
            dW1 = -np.dot(X_train.T, 
                        db1) / m
            db1 = -np.sum(db1, axis=0) / m

            # update parameters
            W2 = W2 - dW2 * learningrate
            b2 = b2 - db2 * learningrate
            W1 = W1 - dW1 * learningrate
            b1 = b1 - db1 * learningrate

            # print the loss
            if i%10==0:
                print ("Cost after iteration %i: %f" % (i, loss))

In [23]:
model = iris(5)

In [24]:
model.fit(X_train, y_train, 0.01, 1000)

Cost after iteration 0: 5.886152
Cost after iteration 10: 5.892949
Cost after iteration 20: 5.891820
Cost after iteration 30: 5.891180
Cost after iteration 40: 5.890788
Cost after iteration 50: 5.890539
Cost after iteration 60: 5.890382
Cost after iteration 70: 5.890288
Cost after iteration 80: 5.890241
Cost after iteration 90: 5.890227
Cost after iteration 100: 5.890241
Cost after iteration 110: 5.890276
Cost after iteration 120: 5.890329
Cost after iteration 130: 5.890397
Cost after iteration 140: 5.890478
Cost after iteration 150: 5.890570
Cost after iteration 160: 5.890672
Cost after iteration 170: 5.890783
Cost after iteration 180: 5.890902
Cost after iteration 190: 5.891029
Cost after iteration 200: 5.891162
Cost after iteration 210: 5.891301
Cost after iteration 220: 5.891447
Cost after iteration 230: 5.891598
Cost after iteration 240: 5.891755
Cost after iteration 250: 5.891916
Cost after iteration 260: 5.892082
Cost after iteration 270: 5.892253
Cost after iteration 280: 5.892

In [8]:
m = X_train[0]
m

array([0.9625, 0.35  , 0.8375, 0.25  ])

In [5]:
a = np.arange(6).reshape(2,3)
b = np.multiply(np.mat(a), np.mat(a))
b

matrix([[ 0,  1,  4],
        [ 9, 16, 25]])

In [21]:
softmax(a)

array([[0.00426978, 0.01160646, 0.03154963],
       [0.08576079, 0.23312201, 0.63369132]])

In [28]:
b1 = np.zeros((1, 3))
b2 = np.array([1,2,3])
b1.shape, b2.shape

((1, 3), (3,))

In [32]:
b3 = b1+b2
b3.shape

(1, 3)

In [33]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [36]:
a = sigmoid(0)
a


0.5

In [9]:
def softmax(Z):
    return np.exp(Z)/np.sum(np.exp(Z),axis=0)

In [3]:
a = (1,2,3)
softmax(a)

array([0.09003057, 0.24472847, 0.66524096])

In [5]:
b = (-1,2,5)
softmax(b)

array([0.00235563, 0.04731416, 0.95033021])

In [7]:
c = np.array([[1,2,3],
              [-1,2,5]])
c

array([[ 1,  2,  3],
       [-1,  2,  5]])

In [12]:
np.exp(c)

array([[  2.71828183,   7.3890561 ,  20.08553692],
       [  0.36787944,   7.3890561 , 148.4131591 ]])

In [19]:
np.exp(c)/np.sum(np.exp(c), axis=1).reshape(2,1)

array([[0.09003057, 0.24472847, 0.66524096],
       [0.00235563, 0.04731416, 0.95033021]])

In [3]:
y_hat = np.array([[0.65, 0.43, 0.11],
               [0.05, 0.51, 0.18],
               [0.33, 0.21, 0.72]])
y_hat

array([[0.65, 0.43, 0.11],
       [0.05, 0.51, 0.18],
       [0.33, 0.21, 0.72]])

In [4]:
y = np.array([[1,0,0],
              [0,1,0],
              [0,0,1]])
y

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

In [6]:
def softmax(Z):
    m = Z.shape[0]
    return np.exp(Z)/np.sum(np.exp(Z),axis=1).reshape(m,1)

In [5]:
-np.sum(y*np.log(y_hat))

1.4326315363282558