In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('iris.data', header=None)
data.columns = ['sepal length', 'sepal width',
                'petal length', 'petal width', 'class']
data = pd.concat((data, pd.get_dummies(data['class'])), axis=1)
data = data.drop(['class'], axis=1).to_numpy()
np.random.shuffle(data)

X, y = data[:, :-3], data[:, -3:]

In [3]:
def sigmoid(x):
    return 1.0 / (1.0+np.exp(-x))

def sigmoid_dx(x):
    return sigmoid(x) * (1.0-sigmoid(x))


def softmax(x):
    exp_ = np.exp(x)
    return exp_ / np.sum(exp_, axis=0)


def loss(x, y):
    return -np.mean(np.sum(y*np.log(x), axis=1))

def loss_dx(x, y):
    return (x - y) / x.shape[0]

def accuracy(x, y):
    x = np.argmax(x, axis=1)
    y = np.argmax(y, axis=1)
    return np.mean(x == y)

In [4]:
def feed_forward(X, W1, W2, b1, b2):
    z1 = np.matmul(W1, X.T) + b1
    a1 = sigmoid(z1)
    z2 = np.matmul(W2, a1) + b2
    a2 = softmax(z2)
    return a2.T

In [5]:
def train_one_batch(X, y, W1, W2, b1, b2, l_rate):
    z1 = np.matmul(W1, X.T) + b1
    a1 = sigmoid(z1)
    z2 = np.matmul(W2, a1) + b2
    a2 = softmax(z2)
    
    loss_prim = loss_dx(a2, y.T)
    grad_W2 = np.matmul(loss_prim, a1.T)
    grad_b2 = np.mean(loss_prim, axis=1, keepdims=True)
    grad_W1 = np.matmul(np.matmul(W2.T, loss_prim) * sigmoid_dx(a1), X)
    grad_b1 = np.mean(np.matmul(W2.T, loss_prim) * sigmoid_dx(a1), axis=1, keepdims=True)
    
    W2 -= grad_W2 * l_rate
    b2 -= grad_b2 * l_rate
    W1 -= grad_W1 * l_rate
    b1 -= grad_b1 * l_rate

def train_weights(X, y, W1, W2, b1, b2, l_rate, epochs, batch_size):
    n_total = X.shape[0]
    for epoch in range(1, epochs+1):
        n_batches = n_total // batch_size + (1 if n_total % batch_size != 0 else 0)
        for n_batch in range(n_batches):
            batch_X = X[n_batch*batch_size : (n_batch+1)*batch_size, :]
            batch_y = y[n_batch*batch_size : (n_batch+1)*batch_size, :]
            train_one_batch(batch_X, batch_y, W1, W2, b1, b2, l_rate)
        x = feed_forward(X, W1, W2, b1, b2)
        print('Epoch: {}\tLoss : {:.6f}      Accuracy: {:.6f}'.format(epoch, loss(x, y), accuracy(x, y)))

In [6]:
weights1 = np.random.normal(scale=0.25, size=(4, 4))
bias1 = np.zeros((4, 1))
weights2 = np.random.normal(scale=0.25, size=(3, 4))
bias2 = np.zeros((3, 1))

In [7]:
epochs = 25

# SGD

In [8]:
# batch_size = 1
w1 = weights1.copy()
b1 = bias1.copy()
w2 = weights2.copy()
b2 = bias2.copy()

train_weights(X, y, w1, w2, b1, b2, 0.1, epochs, 1)

Epoch: 1	Loss : 1.060888      Accuracy: 0.346667
Epoch: 2	Loss : 1.008934      Accuracy: 0.340000
Epoch: 3	Loss : 0.885054      Accuracy: 0.666667
Epoch: 4	Loss : 0.762443      Accuracy: 0.693333
Epoch: 5	Loss : 0.676891      Accuracy: 0.780000
Epoch: 6	Loss : 0.614071      Accuracy: 0.840000
Epoch: 7	Loss : 0.566628      Accuracy: 0.866667
Epoch: 8	Loss : 0.521688      Accuracy: 0.900000
Epoch: 9	Loss : 0.468493      Accuracy: 0.946667
Epoch: 10	Loss : 0.415622      Accuracy: 0.973333
Epoch: 11	Loss : 0.371909      Accuracy: 0.980000
Epoch: 12	Loss : 0.337156      Accuracy: 0.973333
Epoch: 13	Loss : 0.309590      Accuracy: 0.973333
Epoch: 14	Loss : 0.287464      Accuracy: 0.973333
Epoch: 15	Loss : 0.269403      Accuracy: 0.966667
Epoch: 16	Loss : 0.254419      Accuracy: 0.966667
Epoch: 17	Loss : 0.241812      Accuracy: 0.966667
Epoch: 18	Loss : 0.231079      Accuracy: 0.953333
Epoch: 19	Loss : 0.221834      Accuracy: 0.953333
Epoch: 20	Loss : 0.213774      Accuracy: 0.953333
Epoch: 21

# Mini Batch GD

In [9]:
# batch_size = 20
w1 = weights1.copy()
b1 = bias1.copy()
w2 = weights2.copy()
b2 = bias2.copy()

train_weights(X, y, w1, w2, b1, b2, 0.1, epochs, 20)

Epoch: 1	Loss : 1.066805      Accuracy: 0.653333
Epoch: 2	Loss : 1.034968      Accuracy: 0.366667
Epoch: 3	Loss : 0.947700      Accuracy: 0.660000
Epoch: 4	Loss : 0.832087      Accuracy: 0.666667
Epoch: 5	Loss : 0.748179      Accuracy: 0.666667
Epoch: 6	Loss : 0.717248      Accuracy: 0.666667
Epoch: 7	Loss : 0.725696      Accuracy: 0.700000
Epoch: 8	Loss : 0.739806      Accuracy: 0.813333
Epoch: 9	Loss : 0.744066      Accuracy: 0.846667
Epoch: 10	Loss : 0.743552      Accuracy: 0.833333
Epoch: 11	Loss : 0.741977      Accuracy: 0.800000
Epoch: 12	Loss : 0.741020      Accuracy: 0.800000
Epoch: 13	Loss : 0.741540      Accuracy: 0.746667
Epoch: 14	Loss : 0.743602      Accuracy: 0.706667
Epoch: 15	Loss : 0.746496      Accuracy: 0.680000
Epoch: 16	Loss : 0.749625      Accuracy: 0.673333
Epoch: 17	Loss : 0.752742      Accuracy: 0.660000
Epoch: 18	Loss : 0.755329      Accuracy: 0.646667
Epoch: 19	Loss : 0.757146      Accuracy: 0.640000
Epoch: 20	Loss : 0.758224      Accuracy: 0.640000
Epoch: 21

# GD

In [10]:
# batch_size = 150, full batch
w1 = weights1.copy()
b1 = bias1.copy()
w2 = weights2.copy()
b2 = bias2.copy()

train_weights(X, y, w1, w2, b1, b2, 0.03, epochs, 160)

Epoch: 1	Loss : 1.086683      Accuracy: 0.606667
Epoch: 2	Loss : 1.074101      Accuracy: 0.666667
Epoch: 3	Loss : 1.061889      Accuracy: 0.666667
Epoch: 4	Loss : 1.050040      Accuracy: 0.666667
Epoch: 5	Loss : 1.037307      Accuracy: 0.660000
Epoch: 6	Loss : 1.021131      Accuracy: 0.646667
Epoch: 7	Loss : 0.998468      Accuracy: 0.653333
Epoch: 8	Loss : 0.966900      Accuracy: 0.666667
Epoch: 9	Loss : 0.926633      Accuracy: 0.666667
Epoch: 10	Loss : 0.881709      Accuracy: 0.666667
Epoch: 11	Loss : 0.837531      Accuracy: 0.666667
Epoch: 12	Loss : 0.797348      Accuracy: 0.666667
Epoch: 13	Loss : 0.762650      Accuracy: 0.666667
Epoch: 14	Loss : 0.734412      Accuracy: 0.666667
Epoch: 15	Loss : 0.712524      Accuracy: 0.666667
Epoch: 16	Loss : 0.697139      Accuracy: 0.666667
Epoch: 17	Loss : 0.687185      Accuracy: 0.666667
Epoch: 18	Loss : 0.684105      Accuracy: 0.666667
Epoch: 19	Loss : 0.683602      Accuracy: 0.666667
Epoch: 20	Loss : 0.696389      Accuracy: 0.666667
Epoch: 21