In [1]:
import numpy as np

# Softmax Function
The softmax function is a very important function in neural networks due to its mathemetical properties and is very widely used in various implementations of ANNs. The softmax function is defined as follows:
![softmax_basic.png](./softmax_basic.png)
 But, one drawback of the direct implementation of the softmax function, as given above, is that the individual exponentiation may blow up(reach infifnity) in the presence of large positive numbers in the vector or die down(reach 0) in the presence of small negative numbers. In such case, Python will not return the correct values of the softmax. The solution to this is to subtract the maximum element of the vector from each element of the vector and then applying the softmax function over the changed vector. The new softmax value is same as the old softax value (check out for yourself) since
![softmax_const.png](./softmax_const.png)

This implementation of softmax is called the shifted softmax implementation.

So, if we have to implement the direct softmax function on a matrix, we consider the vectors to be arranged in the form of rows and we use softmax on each row. The following are the steps for implementing softmax on a matrix
1. Take a row and exponentiate all the elements of the row
2. Sum up the exponentiated elements from the row
3. Divide each element from the row by the sum. The new value of the row gives the softmax implementation over        of that particular row vector
4. Do the same for all the rows

For the shifted-softmax implementation on a matrix, the following are the steps
1. Take a row and subtract the maximum element of the row from each element of the row
2. Exponentiate all the elements of the row (now max-subtracted)
3. Sum up the exponentiated elements (now max-subtracted) from the row
4. Divide each element from the row by the sum. The new value of the row gives the softmax implementation over        of that particular row vector
5. Do the same for all the rows


In [2]:
#  You may find the following functions useful:
#  np.exp, np.sum, np.reshape, np.max for this task
def softmax_vector(x):
    """
    This function computes softmax on a vector (similar to 1D array).

    Please use vectorized operations and numpy broadcasting for the task
    instead of loops to make your code efficient.


    You should make sure that your code works for a single
    N-dimensional vector (treat the vector as a single row). Also,
    make sure that the dimensions of the output match the input.

    Arguments:
    x -- An N dimensional vector 

    Return:
    x -- You are allowed to modify x in-place
    """
    orig_shape = x.shape
    assert len(x.shape)==1
    ### BEGIN SOLUTION
    ##########################################################
                        #Your Code Here#
        
    x=np.exp(x)/np.sum(np.exp(x))
    
    ##########################################################
    ### END SOLUTION
    assert x.shape == orig_shape #checks if output shape same as input shape
    return x

In [16]:
# Running basic test 1 for softmax on vector
test1 = softmax_vector(np.array([1,2]))
print(test1)
ans1 = np.array([0.26894142,  0.73105858])
assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)


[ 0.26894142  0.73105858]


In [None]:
# Running hidden test 1 for softmax on vector. Don't edit cell.   *** 1 mark ***
### BEGIN HIDDEN TESTS
hidden_test1 = softmax_vector(np.array([-4,3]))
print(hidden_test1)
hidden_ans1 = np.array([9.11051194e-04,  9.99088949e-01])
assert np.allclose(hidden_test1, hidden_ans1, rtol=1e-05, atol=1e-06)
### END HIDDEN TESTS

In [None]:
# Running hidden test 2 for softmax on vector. Don't edit cell.    *** 1 mark ***
### BEGIN HIDDEN TESTS
hidden_test2 = softmax_vector(np.array([4,3]))
print(hidden_test2)
hidden_ans2 = np.array([0.73105858, 0.26894142])
assert np.allclose(hidden_test2, hidden_ans2, rtol=1e-05, atol=1e-06)
### END HIDDEN TESTS

In [6]:
#   You may find the following functions useful:
#    np.exp, np.sum, np.reshape, np.max fot this task
def softmax_matrix(x):
    """
    This function computes softmax on a matrix (similar to 2D array).

    Please use vectorized operations and numpy broadcasting for the task
    instead of loops to make your code efficient.
 

    You should make sure that your code works for M x N matrices. 
    Also, make sure that the dimensions of the output match the input.



    Arguments:
    x -- An M x N dimensional numpy matrix.

    Return:
    x -- You are allowed to modify x in-place
    """
    orig_shape = x.shape
    assert len(x.shape)>1
    ### BEGIN SOLUTION
    ##########################################################
                        #Your Code Here#
        
    x=np.exp(x)/(np.sum(np.exp(x),axis=1)[:,None])
    
    ##########################################################
    ### END SOLUTION
    assert x.shape == orig_shape
    return x   


In [7]:
# Running basic test 1 for softmax on matrix
test2 = softmax_matrix(np.array([[1,2],[3,4]]))
print(test2)
ans2 = np.array([
        [0.26894142, 0.73105858],
        [0.26894142, 0.73105858]])
assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)



Running basic test 1 for softmax on matrix
[[ 0.26894142  0.73105858]
 [ 0.26894142  0.73105858]]


In [None]:
# Running hidden test 1 for softmax on matrix. Don't edit cell.   *** 2 marks ***
### BEGIN HIDDEN TESTS
hidden_test3 = softmax_matrix(np.array([[-1,3],[3,-2]]))
print(hidden_test3)
hidden_ans3 = np.array([
        [0.01798621, 0.98201379],
        [0.99330715, 0.00669285]])
assert np.allclose(hidden_test3, hidden_ans3, rtol=1e-05, atol=1e-06)
### END HIDDEN TESTS

In [None]:
# Running hidden test 2 for softmax on matrix. Don't edit cell.     *** 2 marks ***
### BEGIN HIDDEN TESTS
hidden_test4 = softmax_matrix(np.array([[1,0],[0,2]]))
print(hidden_test4)
hidden_ans4 = np.array([
        [0.73105858, 0.26894142],
        [0.11920292, 0.88079708]])
assert np.allclose(hidden_test4, hidden_ans4, rtol=1e-05, atol=1e-06)
### END HIDDEN TESTS

In [10]:
#   You may find the following functions useful:
#    np.exp, np.sum, np.reshape, np.max for this task
def softmax_shift(x):
    """
    Compute the softmax function for each row of the input x (may be vector or matrix).

    Please use vectorized operations and numpy broadcasting for the task
    instead of loops to make your code efficient.

    You should also make sure that your code works for a single
    N-dimensional vector (treat the vector as a single row) and
    for M x N matrices. Also,make sure that the dimensions 
    of the output match the input.

    Arguments:
    x -- A N dimensional vector or M x N dimensional numpy matrix.

    Return:
    x -- You are allowed to modify x in-place
    """
    orig_shape = x.shape

    if len(x.shape) > 1:
    ### BEGIN SOLUTION
    ##########################################################
                        #Your Code Here#

        x=(np.exp(x-(np.array(np.max(x,axis=1)))[:,None]))/(np.sum(np.exp(x-(np.array(np.max(x,axis=1)))[:,None]),axis=1)[:,None])

    ##########################################################
    ### END SOLUTION
    else:
    ### BEGIN SOLUTION
    ##########################################################
                        #Your Code Here#

        x=np.exp(x-np.max(x))/np.sum(np.exp(x-np.max(x)))
    ##########################################################
    ### END SOLUTION
    assert x.shape == orig_shape
    return x


In [11]:
# Running basic test 1 for shifted-softmax on vector
test3 = softmax_shift(np.array([-1001,-1002]))
print(test3)
ans3 = np.array([0.73105858, 0.26894142])
assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)

# Running basic test 2 for shifted-softmax on matrix
test4 = softmax_shift(np.array([[1001,1002],[3,4]]))
print(test4)
ans4 = np.array([
        [0.26894142, 0.73105858],
        [0.26894142, 0.73105858]])
assert np.allclose(test4, ans4, rtol=1e-05, atol=1e-06)



Running basic test 1 for shifted-softmax on vector
[ 0.73105858  0.26894142]
Running basic test 2 for shifted-softmax on matrix
[[ 0.26894142  0.73105858]
 [ 0.26894142  0.73105858]]


In [15]:
# Running hidden test 1 for shifted-softmax on vector. Don't edit cell.   *** 2 marks ***
### BEGIN HIDDEN TESTS
hidden_test5 = softmax_shift(np.array([-339,-347]))
print(hidden_test5)
hidden_ans5 = np.array([9.99664650e-01, 3.35350130e-04])
assert np.allclose(hidden_test5, hidden_ans5, rtol=1e-05, atol=1e-06)
### END HIDDEN TESTS

Running hidden test 1 for shifted-softmax on vector
[  9.99664650e-01   3.35350130e-04]


In [None]:
# Running hidden test 1 for shifted-softmax on matrix. Don't edit cell.   *** 2 marks ***
### BEGIN HIDDEN TESTS
hidden_test6 = softmax_shift(np.array([[221,218],[0,3]]))
print(hidden_test6)
hidden_ans6 = np.array([
        [0.95257413, 0.04742587],
        [ 0.04742587, 0.95257413]])
assert np.allclose(hidden_test6, hidden_ans6, rtol=1e-05, atol=1e-06)
### END HIDDEN TESTS