In [10]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets as sd
import sklearn.linear_model
from sklearn.model_selection import train_test_split
np.random.seed(1)

In [11]:
X,Y = sd.load_digits(return_X_y=True)

In [12]:
Y.shape

(1797,)

In [13]:
X=np.transpose(X)
Y=Y.reshape(1,1797)
print(X.shape)
print(Y.shape)

(64, 1797)
(1, 1797)


In [14]:
Z=np.zeros([10,1797])
for i in range(0,1797):
    Z[Y[0,i],i]=1
Y=Z
Y.shape

(10, 1797)

In [15]:
type(Y)

numpy.ndarray

In [16]:
# training examples
m = Y.shape[1]
m

1797

In [17]:
def layer_sizes(X, Y,n):
    """
    Arguments:
    X -- input dataset of shape (input size, number of examples)
    Y -- labels of shape (output size, number of examples)
    
    Returns:
    n_x -- the size of the input layer
    n_h -- the size of the hidden layer
    n_y -- the size of the output layer
    """
    
    n_x =  X.shape[0]# size of input layer
    n_h =  n  #simply inputs(bubbles) in used one hidden layer
    n_y =  Y.shape[0]# size of output layer
    return (n_x, n_h, n_y)

In [18]:
def initialize_parameters(n_x, n_h, n_y):
    """
    Argument:
    n_x -- size of the input layer
    n_h -- size of the hidden layer
    n_y -- size of the output layer
    
    Returns:
    params -- python dictionary containing your parameters:
                    W1 -- weight matrix of shape (n_h, n_x)
                    b1 -- bias vector of shape (n_h, 1)
                    W2 -- weight matrix of shape (n_y, n_h)
                    b2 -- bias vector of shape (n_y, 1)
    """
    
    np.random.seed(2) # we set up a seed so that your output matches ours although the initialization is random.
    
    W1 = np.random.randn(n_h,n_x)*0.00001
    b1 = np.ones((n_h,1))
    W2 = np.random.randn(n_y,n_h)*0.00001
    b2 =np.ones((n_y,1))
   
    parameters = {"W1": W1,                  "b1": b1,                  "W2": W2,                  "b2": b2}
    return parameters

In [19]:
def relu(x):
    return x*(x>0)

In [20]:
def sigmoid(x):
    """
    Compute the sigmoid of x

    Arguments:
    x -- A scalar or numpy array of any size.

    Return:
    s -- sigmoid(x)
    """
    s = 1/(1+np.exp(-x))
    return s

In [56]:
def softmax(x):
    """Compute softmax values for each sets of scores in x.""" 
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [57]:
def forward_propagation(X, parameters):
    """
    Argument:
    X -- input data of size (n_x, m)
    parameters -- python dictionary containing your parameters (output of initialization function)
    
    Returns:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    # Retrieve each parameter from the dictionary "parameters"
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    # Implement Forward Propagation to calculate A2 (probabilities)
    Z1 = np.dot(W1,X)+b1
    #   A1 = np.tanh(Z1)
    A1 =np.tanh(Z1)
    #Z2 = np.dot(A1.T,W2)+b2
    Z2=((W2).dot(A1))+b2
    A2 =softmax(Z2)
    cache = {"Z1": Z1,             "A1": A1,             "Z2": Z2,             "A2": A2}
    
    return A2, cache

In [23]:
def compute_cost(A2, Y, parameters):
    """
    Computes the cross-entropy cost given in equation (13)
    Arguments:
    A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    parameters -- python dictionary containing your parameters W1, b1, W2 and b2
    
    Returns:
    cost -- cross-entropy cost given equation (13)
    """
    """
    m = Y.shape[1] 
    # Compute the cross-entropy cost
    """
    #A2=np.apply_along_axis(np.argmax,0,A2)
    #Y=np.apply_along_axis(np.argmax,0,Y)
    m=Y.shape[1];
    logprobs = np.multiply(Y,np.log(A2))+np.multiply(1-Y,np.log(1-A2))
    cost = -np.sum(logprobs)/m
    cost = np.squeeze(cost)     
    assert(isinstance(cost, float))
    """
    
    count=0
    for i in range(0,A2.shape[0]):
        if A2[i]!=Y[i]:
            count=count+1
    """
    return cost
    
        

In [24]:
def backward_propagation(parameters, cache, X, Y):
    """
    Implement the backward propagation using the instructions above.
    
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
   
    m = X.shape[1]

    W1 = parameters["W1"]
    W2 = parameters["W2"]
        
    A1 = cache["A1"]
    A2 = cache["A2"]
    
    
    dZ2 = A2-Y
    dW2 = (1/m)*np.dot(dZ2,A1.T)
    db2 = (1/m)*np.sum(dZ2,axis=1,keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T,dZ2),1-np.power(A1,2))
    dW1 = (1/m)*np.dot(dZ1,X.T)
    db1 = (1/m)*np.sum(dZ1,axis=1,keepdims=True)
    
    grads = {"dW1": dW1,  "db1": db1,  "dW2": dW2, "db2": db2}    
    return grads

In [36]:
def update_parameters(parameters, grads, learning_rate = 0.004):
    """
    Updates parameters using the gradient descent update rule given above
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients 
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
    """
   
    W1 = parameters["W1"]
    b1 = parameters["b1"] 
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    alpha = learning_rate
    # Retrieve each gradient from the dictionary "grads"
    alpha = learning_rate   
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    
    # Update rule for each parameter
    
    W1 = W1 - alpha *dW1
    b1 = b1 - alpha *db1
    W2 = W2 - alpha *dW2
    b2 = b2 - alpha *db2

    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [64]:
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    np.random.seed(3)
    
    n_x = layer_sizes(X, Y,n_h)[0]
    n_y = layer_sizes(X, Y,n_h)[2]
    parameters = initialize_parameters(n_x,n_h,n_y)
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    # Loop (gradient descent)

    for i in range(0, num_iterations):
         
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)        
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2,Y,parameters)
 
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters,cache,X,Y)
 
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters,grads)
        
        
        # Print the cost every 1000 iterations
        if print_cost and i % 10000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))

    return parameters

In [62]:
def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """
    
    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    A2, cache = forward_propagation(X,parameters)
    #predictions = np.round(A2)
    predictions=np.apply_along_axis(np.argmax, 0, A2)
    return predictions

In [28]:
train_X = X[:,:1500]
test_X = X[:,1501:1797]
train_Y = Y[:,:1500]
test_Y = Y[:,1501:1797]
#train_X,test_X,train_Y,test_Y = train_test_split(X,Y,test_size=0.2)
train_X.shape,train_Y.shape,test_X.shape,test_Y.shape,Y[:,1501:1797].shape

((64, 1500), (10, 1500), (64, 296), (10, 296), (10, 296))

In [65]:
parameters = nn_model(train_X, train_Y, n_h = 15, num_iterations = 100000, print_cost=True)

Cost after iteration 0: 3.250830
Cost after iteration 10000: 0.173270
Cost after iteration 20000: 0.073165
Cost after iteration 30000: 0.045116
Cost after iteration 40000: 0.032610
Cost after iteration 50000: 0.025652
Cost after iteration 60000: 0.020811
Cost after iteration 70000: 0.017638
Cost after iteration 80000: 0.015343
Cost after iteration 90000: 0.013591


In [70]:
def accuracy(x,y,parameters):
    predictions=predict(parameters,x)
    y=np.apply_along_axis(np.argmax,0,y)
    count=0
    for i in range(0,x.shape[1]):
        if predictions[i] == y[i]:
            count=count+1
    accu=(count/x.shape[1])*100
    return accu   

In [71]:
#trying to check for one input only 
accuracy(test_X,test_Y,parameters)

91.21621621621621