In [12]:
import numpy as np
import scipy as sp
from sklearn import datasets

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = iris.target

In [7]:
print(X[:2])

[[5.1 3.5]
 [4.9 3. ]]


# Simple NN

In [46]:
# Weights must be of the format (in, hidden)
shapes = [2, 3]
W = [np.random.randn(shapes[i], shapes[i+1]) for i in range(len(shapes)-1)]

def affine(x, w):
    """
    x : (n, m)
    w : (m, k)
    out : (n, k)
    """
    return x.dot(w)

def affine_(d, x, w):
    """
    x : (n, m)
    w : (m, k)
    d : (n, k)
    ---
    dw : (m, k)
    dx : (n, m)
    """
    return {'x': d.dot(w.T), 'w': x.T.dot(d)}

def sigmoid(x):
    """
    x : (n, m)
    out : (n, m)
    """
    return sp.special.expit(x)

def sigmoid_(d, x):
    """
    x : (n, m)
    d : (n, m)
    ---
    dx : (n, m)
    """
    sigm = sp.special.expit(x)
    return d * (sigm * (1-sigm))

def softmax_ce(x, y):
    """
    x : (n, m)
    y : (n,)
    out : () [scalar]
    
    Equation is 1/n * \sum_i^n [ -log(e^x_{y_i} / \sum_j e^x_j) ]
    which is equivalently:
        
        1/n * \sum_i^n log(\sum_j e^x_j) - x_{y_i}
    """
    n = x.shape[0]
    exp = np.exp(x)
    # denominator in original expression, after log
    denom = np.log(np.sum(exp, axis=1))
    return np.sum(denom - x[np.arange(n), y]) / n

def softmax_ce_(x, y):
    """
    x : (n, m)
    y : (n,)
    ---
    dx : (n, m)
    
    Back propagation for a single data point is:
    
    dL_i/dx_{ik} = -1_{k == y_i} + softmax(x_i)_k
    
    thus, for all data points:
    dL/dx_k = 1/n * \sum_i -1_{k == y_i} + softmax(x_i)_k
    """
    n = x.shape[0]
    exp = np.exp(x)
    softmax = exp / np.sum(exp, axis=1)
    softmax[np.arange(n), y] -= 1
    return softmax / n

In [47]:
# Forward pass


[[ 0.00000000e+00  4.05154196e+03]
 [ 5.05610746e-44 -5.00000000e-01]]


In [16]:
sp.special.expit(np.array([0, 1, 2]))

array([0.5       , 0.73105858, 0.88079708])