In [1]:
%matplotlib notebook
import csv
from scipy.io import loadmat
import numpy as np
import scipy.optimize as opt
from pandas.io.parsers import read_csv
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from sklearn.preprocessing import PolynomialFeatures

In [2]:
data = loadmat('ex4data1.mat')
y = data['y']
x = data['X']
unos = np.full((5000,1),1)
weights = loadmat('ex4weights.mat')
theta1, theta2 = weights['Theta1'], weights['Theta2']
#x = np.append(unos, x, axis=1)
params = np.concatenate((np.ravel(theta1),np.ravel(theta2)))

In [3]:
def sigmoide(z):
    s = np.dot(z,-1)
    e = np.exp(s)
    d = 1 + e
    return 1/d

In [4]:
def propagacion(x,theta1, theta2): 
    m = x.shape[0]
    a1 = np.insert(x, 0, values=np.ones(m), axis=1)
    z2 = np.matmul(a1, theta1.T)
    a2 = np.insert(sigmoide(z2), 0, values=np.ones(m), axis=1)
    z3 = np.matmul(a2, theta2.T)
    a3 = sigmoide(z3)
    return [a1,a2,a3], [z2,z3]

In [5]:
def propagacion_multi(x,th,n): 
    a = []
    a.append(x)
    for i in range(n):
        unos = np.full((len(x),1),1)
        a[i] = np.append(unos, x, axis=1)
        an = sigmoide(np.dot(a[i],th[i]))
        x = an
        a.append(an)        
    return a
propagacion_multi(x,[theta1.T, theta2.T],2 )[2]

array([[1.12661530e-04, 1.74127856e-03, 2.52696959e-03, ...,
        4.01468105e-04, 6.48072305e-03, 9.95734012e-01],
       [4.79026796e-04, 2.41495958e-03, 3.44755685e-03, ...,
        2.39107046e-03, 1.97025086e-03, 9.95696931e-01],
       [8.85702310e-05, 3.24266731e-03, 2.55419797e-02, ...,
        6.22892325e-02, 5.49803551e-03, 9.28008397e-01],
       ...,
       [5.17641791e-02, 3.81715020e-03, 2.96297510e-02, ...,
        2.15667361e-03, 6.49826950e-01, 2.42384687e-05],
       [8.30631310e-04, 6.22003774e-04, 3.14518512e-04, ...,
        1.19366192e-02, 9.71410499e-01, 2.06173648e-04],
       [4.81465717e-05, 4.58821829e-04, 2.15146201e-05, ...,
        5.73434571e-03, 6.96288990e-01, 8.18576980e-02]])

In [6]:
def testeo(y, f):
    acertados = 0
    for i in range(len(y)):
        if np.argmax(f[i])+1 == y[i]:
            acertados +=1
    return acertados, acertados/len(y)*100

In [7]:
def norm(y,num_etiqueta):
    y_norm = np.zeros((len(y),num_etiqueta))
    for i in range(len(y)):
        y_norm[i][y[i]-1]= 1
    return np.transpose(y_norm)

In [8]:
def coste(theta1,theta2,x,y,num_etiqueta, reg):
    y = norm(y,num_etiqueta)
    #g = np.copy(propagacion(propagacion(x,theta1.T),theta2.T))
    g = propagacion_multi(x,[theta1.T,theta2.T],2)[2]
    t1 = np.sum(np.multiply(-y.T,np.log(g)) - np.multiply((1-y.T),np.log(1-g)))/len(x)
    t2 = (np.sum(theta1[:,1:]**2) + np.sum(theta2[:,1:]**2))*reg/(2*len(x))
    return t1+t2
coste(theta1,theta2,x,y,10,1)

0.38376985909092365

In [9]:
def forward_propagate(X, theta1, theta2):
    m = X.shape[0]

    a1 = np.insert(X, 0, values=np.ones(m), axis=1)
    z2 = np.matmul(a1, theta1.T)
    a2 = np.insert(sigmoide(z2), 0, values=np.ones(m), axis=1)
    z3 = np.matmul(a2, theta2.T)
    h = sigmoide(z3)

    return a1, z2, a2, z3, h

In [10]:
def deriv_sig(z):
    return np.multiply(sigmoide(z),1-sigmoide(z))

In [11]:
def pesosAleatorios(L_in, L_out, e=0.12):
    return 2*e*np.random.random_sample((L_out, L_in)) - e

In [12]:
thr1 = pesosAleatorios(theta1.shape[0], theta1.shape[1])
thr2 = pesosAleatorios(theta2.shape[0], theta2.shape[1])
thr1.shape

(401, 25)

In [13]:
def retro_propagacion(x,y,th1,th2, reg):
    a,z = propagacion(x,th1,th2)
    d3 = a[2]-y
    z2 = np.insert(z[0], 0, values=np.ones(1), axis=1)    
    d2 = np.multiply(np.dot(d3, th2),deriv_sig(z2))
    d2_aux = np.copy(d2[:,1:])
    
    delta1 = np.zeros(th1.shape)
    delta2 = np.zeros(th2.shape)
    delta1 += np.dot(d2_aux.T, a[0])
    delta2 += np.dot(d3.T, a[1])
    
    
    delta1 = delta1/len(x)
    delta2 = delta2/len(x)
    delta1[:, 1:] = delta1[:, 1:] + (th1[:, 1:] * reg) / len(x)
    delta2[:, 1:] = delta2[:, 1:] + (th2[:, 1:] * reg) / len(x)    
    
    return np.concatenate((np.ravel(delta1), np.ravel(delta2)))

In [14]:
def backprop (params_rn, num_entradas, num_ocultas, num_etiquetas, x, y, reg = 1):
    theta1 = np.reshape(params_rn[:num_ocultas*(num_entradas+1)], (num_ocultas, (num_entradas+1)))
    theta2 = np.reshape(params_rn[num_ocultas*(num_entradas+1):], (num_etiquetas, (num_ocultas+1)))
    cost = coste(theta1,theta2,x,y, num_etiquetas,reg)
    y = norm(y, num_etiquetas).T
    grad = retro_propagacion(x,y,theta1,theta2, reg)
    return (cost, grad)

In [15]:
backprop(params,theta1.shape[1]-1,theta2.shape[1]-1,10,x,y)

(0.38376985909092365,
 array([ 6.18712766e-05, -2.11248326e-12,  4.38829369e-13, ...,
         4.70513145e-05, -5.01718610e-04,  5.07825789e-04]))

In [16]:
#rp = retro_propagacion(x,y,thr1,thr2)
theta1.shape

(25, 401)

In [17]:
# %load checkNNGradients.py

import numpy as np


def debugInitializeWeights(fan_in, fan_out):
    """
    Initializes the weights of a layer with fan_in incoming connections and
    fan_out outgoing connections using a fixed set of values.
    """

    # Set W to zero matrix
    W = np.zeros((fan_out, fan_in + 1))

    # Initialize W using "sin". This ensures that W is always of the same
    # values and will be useful in debugging.
    W = np.array([np.sin(w) for w in
                  range(np.size(W))]).reshape((np.size(W, 0), np.size(W, 1)))

    return W


def computeNumericalGradient(J, theta):
    """
    Computes the gradient of J around theta using finite differences and
    yields a numerical estimate of the gradient.
    """

    numgrad = np.zeros_like(theta)
    perturb = np.zeros_like(theta)
    tol = 1e-4

    for p in range(len(theta)):
        # Set perturbation vector
        perturb[p] = tol
        loss1 = J(theta - perturb)
        loss2 = J(theta + perturb)

        # Compute numerical gradient
        numgrad[p] = (loss2 - loss1) / (2 * tol)
        perturb[p] = 0

    return numgrad


def checkNNGradients(costNN, reg_param):
    """
    Creates a small neural network to check the back propogation gradients.
    Outputs the analytical gradients produced by the back prop code and the
    numerical gradients computed using the computeNumericalGradient function.
    These should result in very similar values.
    """
    # Set up small NN
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # Generate some random test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)

    # Reusing debugInitializeWeights to get random X
    X = debugInitializeWeights(input_layer_size - 1, m)

    # Set each element of y to be in [0,num_labels]
    y = [(i % num_labels) for i in range(m)]

    # Unroll parameters
    nn_params = np.append(Theta1, Theta2).reshape(-1)

    # Compute Cost
    cost, grad = costNN(nn_params,
                        input_layer_size,
                        hidden_layer_size,
                        num_labels,
                        X, y, reg_param)

    def reduced_cost_func(p):
        """ Cheaply decorated nnCostFunction """
        return costNN(p, input_layer_size, hidden_layer_size, num_labels,
                      X, y, reg_param)[0]

    numgrad = computeNumericalGradient(reduced_cost_func, nn_params)

    # Check two gradients
    np.testing.assert_almost_equal(grad, numgrad)
    return (grad - numgrad)


In [18]:
checkNNGradients(backprop, 1)

array([ 5.33180305e-11, -6.32005559e-12,  7.24889593e-12,  3.16212473e-11,
       -5.61471286e-11, -7.70522535e-13, -2.00906097e-11, -4.52287652e-11,
       -9.29274990e-11,  8.64616712e-12, -4.49352777e-11, -1.24982691e-10,
       -2.87552759e-11,  1.35198797e-12, -8.76765327e-12, -3.30088457e-11,
        2.88099891e-11, -4.41852110e-12,  1.90355995e-11,  3.57186503e-11,
        6.48169574e-11,  1.87805604e-11,  6.44100051e-12,  4.36871372e-12,
        1.56729490e-11,  2.27871055e-11,  6.80207002e-11,  1.93718375e-11,
        5.49517376e-12,  1.61996527e-11,  1.37830442e-11,  1.94179950e-11,
        7.68222708e-11,  1.54377344e-11,  4.10361328e-12,  1.71889447e-11,
        1.63800640e-11,  2.18030316e-11])

In [19]:
# %load displayData.py

import matplotlib.pyplot as plt
import numpy as np


def displayData(X):
    num_plots = int(np.size(X, 0)**.5)
    fig, ax = plt.subplots(num_plots, num_plots, sharex=True, sharey=True)
    plt.subplots_adjust(left=0, wspace=0, hspace=0)
    img_num = 0
    for i in range(num_plots):
        for j in range(num_plots):
            # Convert column vector into 20x20 pixel matrix
            # transpose
            img = X[img_num, :].reshape(20, 20).T
            ax[i][j].imshow(img, cmap='Greys')
            ax[i][j].set_axis_off()
            img_num += 1

    return (fig, ax)


def displayImage(im):
    fig2, ax2 = plt.subplots()
    image = im.reshape(20, 20).T
    ax2.imshow(image, cmap='gray')
    return (fig2, ax2)


In [20]:
fmin = opt.minimize(fun=backprop, x0= params, args=(theta1.shape[1]-1,theta2.shape[1]-1,10, x, y, 1), method='TNC', jac=True, options={'maxiter': 70})

In [21]:
def testeofinal (params_rn, num_entradas, num_ocultas, num_etiquetas, x, y, reg = 1):
    theta1 = np.reshape(params_rn[:num_ocultas*(num_entradas+1)], (num_ocultas, (num_entradas+1)))
    theta2 = np.reshape(params_rn[num_ocultas*(num_entradas+1):], (num_etiquetas, (num_ocultas+1)))
    f = propagacion_multi(x,[theta1.T, theta2.T],2)
    return testeo(y,f[2])

In [22]:
testeofinal(fmin.x,theta1.shape[1]-1,theta2.shape[1]-1,10,x,y)

(4966, 99.32)

In [23]:
y.size

5000