# Practice multidimensional array using numpy 

In [1]:
import numpy as np


In [2]:
# single dimensional array
A = np.array([1, 2, 3, 4])

print(A)
np.ndim(A) # number of dimensions
A.shape
A.shape[0]


[1 2 3 4]


4

In [3]:
# tow dimensional array(matrix)
B = np.array([[1, 2], [3, 4], [5, 6]])

print(B)
np.ndim(B)
B.shape

[[1 2]
 [3 4]
 [5 6]]


(3, 2)

In [4]:
# product of matrix
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# dot function depends on the number of dimensions of the array
# single: vector  matrix: product of matrix
np.dot(A, B)

array([[19, 22],
       [43, 50]])

In [5]:
# another example
A = np.array([[1, 2, 3], [4, 5, 6]])
B = np.array([[1, 2], [3, 4], [5, 6]])

np.dot(A, B)

array([[22, 28],
       [49, 64]])

## Error example
The product can't be calculated unless the number of columns and the number of rows are the same.

In [6]:
C = np.array([[1, 2], [3, 4]])

np.dot(A, C)

ValueError: shapes (2,3) and (2,2) not aligned: 3 (dim 1) != 2 (dim 0)

## Two dimensions x single dimension


In [7]:
A = np.array([[1, 2], [3, 4], [5, 6]])
B = np.array([7, 8])

np.dot(A, B)


array([23, 53, 83])

## product of matrix to neural network

In [8]:
# input
X = np.array([1, 2])

# weight
W = np.array([[1, 3, 5], [2, 4, 6]])

# output
Y = np.dot(X, W)
print(Y)


[ 5 11 17]


## Calculation of weighted sum

$$ A^{(1)} = XW^{(1)} + B^{(1)} $$

### However as follows

$$ A^{(1)} = (a^{(1)}_1, a^{(1)}_2, a^{(1)}_3) \;
X = (x_1, x_2) \:
B^{(1)} = (b^{(1)_1}, b^{(1)_2}, b^{(1)_3}) \\
W^{(1)} = \begin{pmatrix}
w^{(1)}_{11} & w^{(1)}_{21} & w^{(1)}_{31} \\
w^{(1)}_{12} & w^{(1)}_{22} & w^{(1)}_{32}
\end{pmatrix}
$$



In [9]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


In [10]:
# First layer
# input
X = np.array([1.0, 0.5])

# weight
W1 = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])

# bias
B1 = np.array([0.1, 0.2, 0.3])

# print(X.shape)
# print(W1.shape)
# print(B1.shape)

A1 = np.dot(X, W1) + B1
print(A1)

[0.3 0.7 1.1]


In [11]:
Z1 = sigmoid(A1)
print(Z1)

[0.57444252 0.66818777 0.75026011]


In [12]:
# Second layer
W2 = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
B2 = np.array([0.1, 0.2])

# debug
# print(Z1.shape)
# print(W2.shape)
# print(B2.shape)

A2 = np.dot(Z1, W2) + B2
Z2 = sigmoid(A2)

In [13]:
print(A2)
print(Z2)

[0.51615984 1.21402696]
[0.62624937 0.7710107 ]


In [14]:
# output layer
def identity_function(x):
    return x

W3 = np.array([[0.1, 0.3], [0.2, 0.4]])
B3 = np.array([0.1, 0.2])

A3 = np.dot(Z2, W3) + B3
Y = identity_function(A3)

print(Y)

[0.31682708 0.69627909]


### Topic: identity function
Output the input data as it is

# Summary

In [15]:
def init_network():
    network = {}
    network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
    network['b1'] = np.array([0.1, 0.2, 0.3])
    network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
    network['b2'] = np.array([0.1, 0.2])
    network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
    network['b3'] = np.array([0.1, 0.2])

    return network

def forward(network, x):
    w1, w2, w3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = np.dot(x, w1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, w2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, w3) + b3
    y = identity_function(a3)

    return y

network = init_network()
# input layer
x = np.array([1.0, 0.5])

# output layer
y = forward(network, x)
print(y)


[0.31682708 0.69627909]


# Softmax function
This function is usually Classification

$$
y_k = \frac{e(a_k)}{\sum_{i=1}^n a_i}
$$

In [18]:
a = np.array([0.3, 2.9, 4.0])

exp_a =  np.exp(a) # Exponential

sum_exp_a = np.sum(exp_a) # Sum of exponential
# print(sum_exp_a)

y = exp_a / sum_exp_a # This is the above formula
print(y)

[0.01821127 0.24519181 0.73659691]


In [20]:
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y    

# Improvement plan for Softmax function

With the above formula, it may not be possible to calculate \
when the numerical value becomes large. 
So use the following formula

$$
\begin{align}
y_k = \frac{e(a_k)}{\sum_{i=1}^n a_i} &= \frac{Ce(a_k)}{C\sum_{i=1}^n a_i} \\
&=\frac{e(a_k + \log C)}{\sum_{i=1}^n e(a_i + \log C)} \\
&=\frac{e(a_k + C')}{\sum_{i=1}^n e(a_i + C')}
\end{align}
$$

In [23]:
# Overflow
a = np.array([1010, 1000, 999])
np.exp(a) / np.sum(np.exp(a))

# Overflow measures
c = np.max(a)
a - c

  np.exp(a) / np.sum(np.exp(a))
  np.exp(a) / np.sum(np.exp(a))


array([  0, -10, -11])

In [24]:
# Improved version
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp_a = np.sum(exp_a)

    y = exp_a / sum_exp_a
    return y

In [27]:
a = np.array([0.3, 2.9, 4.0])
y = softmax(a)

print(y)
np.sum(y)

[0.01821127 0.24519181 0.73659691]


1.0

1.0