In [3]:
import numpy as np

sizes = [784, 30, 10]
biases = [np.random.randn(y, 1) for y in sizes[1:]]
weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

def cost_derivative(output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations-y)

def backprop(x, y):
        """Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient for the cost function C_x.  ``nabla_b`` and
        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
        to ``self.biases`` and ``self.weights``."""
        nabla_b = [np.zeros(b.shape) for b in biases]
        nabla_w = [np.zeros(w.shape) for w in weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(biases, weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = cost_derivative(activations[-1], y) * \
            sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in Chapter 2 of the book.  Here,
        # l = 1 means the last layer of neurons, l = 2 is the
        # second-last layer, and so on.  It's a renumbering of the
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in range(2, 3):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

def update_mini_batch(mini_batch=10, beta_1=0.9, beta_2=0.999, t=0):
            nabla_b = [np.zeros(b.shape) for b in biases]
            nabla_w = [np.zeros(w.shape) for w in weights]
            # Llamo a las variables m y v que están fuera de esta función y las actualizo
            m_b = [abs(np.zeros(b.shape)) for b in biases] # m para las b
            v_b = [abs(np.zeros(b.shape)) for b in biases] # v para las b
            m_w = [abs(np.zeros(w.shape)) for w in weights] # m para las w
            v_w = [abs(np.zeros(w.shape)) for w in weights] # v para las w
            for x, y in mini_batch:
                delta_nabla_b, delta_nabla_w = backprop(x, y)
                nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
                nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        
            m_b = [beta_1*mb + (1-beta_1)*nb for mb, nb in zip(m_b, nabla_b)]
            v_b = [beta_2*vb + (1-beta_2)*(nb**2) for vb, nb in zip(v_b, nabla_b)]
            m_w = [beta_1*mw + (1-beta_1)*nw for mw, nw in zip(m_w, nabla_w)]
            v_w = [beta_2*vw + (1-beta_2)*(nw**2) for vw, nw in zip(v_w, nabla_w)]
            
            # creo las hats de cada m y v
            m_b_hat = [mb/(1-beta_1*t) for mb in m_b]
            m_w_hat = [mw/(1-beta_1*t) for mw in m_w]
            v_b_hat = [vb/(1-beta_2*t) for vb in v_b]
            v_w_hat = [vw/(1-beta_2*t) for vw in v_w]
            
            return (m_b_hat, m_w_hat, v_b_hat, v_w_hat)

In [4]:
m_b_hat, m_w_hat, v_b_hat, v_w_hat = update_mini_batch()
        
for i in v_b_hat:
    if i < 0:
        print(i)
        
for i in v_w_hat:
    if i < 0:
        print(i)

TypeError: 'int' object is not iterable