### Importing needed modules

In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy.io import loadmat
import tensorflow as tf
import os

### Loading and saving images as matrices

In [39]:
data_NN = [] # dataset array

cwd = os.getcwd() + "\images"+ "\ "[0]
pokemon_labels = []

with os.scandir(cwd) as itr:
    i=0
    for dir in os.scandir(cwd):
        cwd_poke = cwd + dir.name + "\ "[0]
        #y = np.zeros((len(os.listdir(cwd)), 1)) # creates the label for each pokemon

        with os.scandir(cwd_poke) as itr2 : # iterates every image in each folder and appends name to label list
            # these directory lists are sorted but it doesn't matter because the labels are the same (atleast if using VSCode)

            for img in itr2:
                pokemon_labels.append(i) # appends label number to label list
                img_open = open(cwd_poke+img.name, 'rb') # saving image to variable in binary
                read_img = img_open.read()

                if img.name.endswith(".jpg") or img.name.endswith(".jpeg"):
                    img_decode = tf.image.decode_jpeg(read_img, channels = 3)# /255 # decode jpeg image to matrix of matrices and normalizes (divide by 255) 
                    resized_img = tf.image.resize_with_pad(img_decode, 150,150) # resizes image 
                
                elif img.name.endswith(".png"):
                    img_decode = tf.image.decode_png(read_img, channels = 3) #/255 # decode png image to matrix of matrices and normalizes (divide by 255) 
                    resized_img = tf.image.resize_with_pad(img_decode, 150,150)

                data_NN.append(resized_img) # appends image to matrix with images
        i+=1

data_NN = np.array(data_NN)

#### Confirming that the data was processed correctly

Transforming an image matrix back to .jpeg format and verifying that it was correctly saved
It will be confirmed that the first image in data_NN list is not the first image in the folder

In [3]:
img = data_NN[0]

import PIL

def tensor_to_image(tensor):
    tensor = tensor*255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor)>3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return PIL.Image.fromarray(tensor)

tensor_to_image(img).show()


In [40]:
m = data_NN.shape[0]
n1 = data_NN.shape[1]
n2 = data_NN.shape[2]
n3 = data_NN.shape[3]

data_final = data_NN.flatten().reshape((m,n1*n2*n3))
data_final.shape # transform matrix into working order


(6837, 67500)

### Computing the Cost Function

Recall that the regularized cost function in logistic regression is:

$J(\theta) = \frac{1}{m} \sum_{i=1}^{m} [ -y^{(i)}log(h_{\theta}(x^{(i)})) - (1 - y^{(i)})log(1 - (h_{\theta}(x^{(i)}))] + \frac{\lambda}{2m} \sum_{j=1}^n \theta_j^2$

The cost gradients are (remember that the gradient of $\theta_0$ is not regularized): 

$ \frac{\partial J(\theta)}{\partial \theta_j} = \frac{1}{m} \sum_{i=1}^{m} (h_{\theta}(x^{(i)}) - y^{(i)})x_j^{(i)}$ for $j=0$

$ \frac{\partial J(\theta)}{\partial \theta_j} = \frac{1}{m} \sum_{i=1}^{m} (h_{\theta}(x^{(i)}) - y^{(i)})x_j^{(i)} + \frac{\lambda}{m}\theta_j$ for $j\geq 1$

#### Defining the sigmoid and regularized cost function

In [78]:
def sigmoid(z):
    """
    Returns sigmoid of z.
    """
    gz = 1/ (1 + np.exp(-z))
    return gz

def costFunctionReg(X, y, theta, Lambda):
    """
    Take in numpy array of  data X, labels y and theta, to return the regularized cost function and gradients
    of the logistic regression classifier
    """
    
    #number of training examples 
    m = X.shape[1]

    #vector of the model predictions for all training examples      
    h = sigmoid(np.dot(X,theta)) # forward propagation

    error = (-y * np.log(h)) - ((1-y)*np.log(1-h))
    
    #cost function without regularization term
    cost = sum(error)/m
    
    #add regularization term to the cost function L2 norm
    regCost= cost + Lambda/(2*m) * sum(theta[1:]**2)
    
    #gradient of theta_0
    grad_0= (1/m) * np.sum(np.dot(X.transpose(),(h - y))[0])

    # #vector of gradients of theta_j from j=1:n (adding the regularization term of the gradient)
    grad = (1/m) * np.dot(X.transpose(),(h - y))[1:]) + (Lambda/m)* theta[1:]

    # all gradients in a column vector shape
    grad_all=np.append(grad_0,grad)
    grad_all = grad_all.reshape((len(grad_all), 1))


    return regCost[0], grad_all
# def costFunctionReg(theta, X, y ,Lambda):
#     """
#     Take in numpy array of theta, X, and y to return the regularize cost function and gradient
#     of a logistic regression
#     """
    
#     m=len(y)
#     y=y[:,np.newaxis]
#     predictions = sigmoid(X @ theta)
#     error = (-y * np.log(predictions)) - ((1-y)*np.log(1-predictions))
#     cost = 1/m * sum(error)
#     regCost= cost + Lambda/(2*m) * sum(theta**2)
    
#     # compute gradient
#     j_0= 1/m * (X.transpose() @ (predictions - y))[0]
#     j_1 = 1/m * (X.transpose() @ (predictions - y))[1:] + (Lambda/m)* theta[1:]
#     grad= np.vstack((j_0[:,np.newaxis],j_1))
#     return regCost[0], grad

SyntaxError: unmatched ')' (2536948166.py, line 32)

#### Gradient descent
Defining the gradient descent function

In [75]:
def gradientDescent(X, y, theta, alpha, num_iters, Lambda):
    """
    Take in numpy array X, y and theta and update theta by taking num_iters gradient steps
    with learning rate of alpha
    
    return theta and the list of the cost of theta during each iteration
    """
    
    J_history =[]
    
    for i in range(num_iters):
        
        #call CostFunctionReg 
        cost, grad = costFunctionReg(X, y, theta, Lambda)
        
        #update theta

        theta = theta - np.dot(alpha,grad)
        
        J_history.append(cost)
    
    return theta , J_history

In [76]:
def oneVsAll(X, y, initial_theta, alpha, num_iters, Lambda, K):
    """
    Inputs:
    numpy array of data X and labels y
    initial_theta - inicialized vector of model parameters theta 
    alpha - learning rate
    num_iters - number of iterations
    Lambda - regularization parameter 
    K -number of classes
    
    ONEVSALL trains K Logistic Regression classifiers using gradient descent. 
    
    Returns:   
    all_theta - Kxn matrix where i-th row corresponds to the i-th classifier, n parameters
    all_J - the evolution of cost function during each iteration (J_history) for all K classifiers
    
    """
    all_theta = []
    all_J = []

    #number of training examples
    m = X.shape[0]
    
    #number of features
    n = X.shape[1]
    
    # add an extra column of 1´s corresponding to xo=1 (aka intercept term)
    X = np.append(np.ones((m,1)),X,axis=1)
    
    for i in range(1,K+1):
        theta , J_history = gradientDescent(X, np.where(y == i,1,0), initial_theta, alpha,num_iters, Lambda)
        
        # add the vector of optimized parameters theta of classifier i
        all_theta.extend(theta)
                
        # add the cost function history of classifier i
        all_J.extend(J_history)
        
    return np.array(all_theta).reshape(K,n+1), all_J

In [79]:
# defining K - number of classes
K = len(os.listdir(cwd)) # number of unique Pokemon

data_used = data_final[0:10]
label_used = np.array(pokemon_labels[0:10])

m = data_used.shape[0]
n = data_used.shape[1]

initial_theta = np.zeros((n+1,1))

# Hyper-parameters
alpha = 1 # learning rate
num_iters = 300
Lambda = 0.1

all_theta, all_J = oneVsAll(data_used, label_used, initial_theta, alpha, num_iters, Lambda, K)


ValueError: operands could not be broadcast together with shapes (67501,1) (2,1) 

In [57]:
#Computes the gradient of sigmoid function
def sigmoidGradient(z):
    """
    computes the gradient of the sigmoid function
    """
    sigmoid = sigmoid(z)
    
    return sigmoid * (1 - sigmoid)

In [71]:
def nnCostFunction(nn_params,input_layer_size, hidden_layer_size, num_labels,X, y,Lambda):
    """
    nn_params contains the parameters unrolled into a vector
    
    compute the cost and gradient of the neural network
    """
    # Reshape nn_params back into the parameters Theta1 and Theta2
    Theta1 = nn_params[:((input_layer_size+1) * hidden_layer_size)].reshape(hidden_layer_size,input_layer_size+1)
    Theta2 = nn_params[((input_layer_size +1)* hidden_layer_size ):].reshape(num_labels,hidden_layer_size+1)
    
    m = X.shape[0]
    n = X.shape[1]


    J=0
    X = np.hstack((np.ones((m,1)),X))
    y10 = np.zeros((m,num_labels))
    
    a1 = sigmoid(X @ Theta1.T)
    a1 = np.hstack((np.ones((m, n, n,1)), a1)) # hidden layer
    a2 = sigmoid(a1 @ Theta2.T) # output layer
    
    for i in range(1,num_labels+1):
        y10[:,i-1][:,np.newaxis] = np.where(y==i,1,0)
    for j in range(num_labels):
        J = J + sum(-y10[:,j] * np.log(a2[:,j]) - (1-y10[:,j])*np.log(1-a2[:,j]))
    
    cost = 1/m* J
    reg_J = cost + Lambda/(2*m) * (np.sum(Theta1[:,1:]**2) + np.sum(Theta2[:,1:]**2))
    
    # Implement the backpropagation algorithm to compute the gradients
    
    grad1 = np.zeros((Theta1.shape))
    grad2 = np.zeros((Theta2.shape))
    
    for i in range(m):
        xi= X[i,:] # 1 X 401
        a1i = a1[i,:] # 1 X 26
        a2i =a2[i,:] # 1 X 10
        d2 = a2i - y10[i,:]
        d1 = Theta2.T @ d2.T * sigmoidGradient(np.hstack((1,xi @ Theta1.T)))
        grad1= grad1 + d1[1:][:,np.newaxis] @ xi[:,np.newaxis].T
        grad2 = grad2 + d2.T[:,np.newaxis] @ a1i[:,np.newaxis].T
        
    grad1 = 1/m * grad1
    grad2 = 1/m*grad2
    
    grad1_reg = grad1 + (Lambda/m) * np.hstack((np.zeros((Theta1.shape[0],1)),Theta1[:,1:]))
    grad2_reg = grad2 + (Lambda/m) * np.hstack((np.zeros((Theta2.shape[0],1)),Theta2[:,1:]))
    
    return cost, grad1, grad2, reg_J, grad1_reg, grad2_reg

In [72]:
def randInitializeWeights(L_in, L_out):
    """
    randomly initializes the weights of a layer with L_in incoming connections and L_out outgoing connections.
    """
    
    epi = (6**1/2) / (L_in + L_out)**1/2
    
    W = np.random.rand(L_out,L_in +1) *(2*epi) -epi
    
    return W

In [73]:
input_layer_size  = data_NN.shape[0]
hidden_layer_size = 25

num_labels = len(os.listdir(cwd))

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
initial_nn_params = np.append(initial_Theta1.flatten(),initial_Theta2.flatten())

In [74]:
def gradientDescentnn(X,y,initial_nn_params,alpha,num_iters,Lambda,input_layer_size, hidden_layer_size, num_labels):
    """
    Take in numpy array X, y and theta and update theta by taking num_iters gradient steps
    with learning rate of alpha
    
    return theta and the list of the cost of theta during each iteration
    """
    Theta1 = initial_nn_params[:((input_layer_size+1) * hidden_layer_size)].reshape(hidden_layer_size,input_layer_size+1)
    Theta2 = initial_nn_params[((input_layer_size +1)* hidden_layer_size ):].reshape(num_labels,hidden_layer_size+1)
    
    m=len(y)
    J_history =[]
    
    for i in range(num_iters):
        nn_params = np.append(Theta1.flatten(),Theta2.flatten())
        cost, grad1, grad2 = nnCostFunction(nn_params,input_layer_size, hidden_layer_size, num_labels,X, y,Lambda)[3:]
        Theta1 = Theta1 - (alpha * grad1)
        Theta2 = Theta2 - (alpha * grad2)
        J_history.append(cost)
    
    nn_paramsFinal = np.append(Theta1.flatten(),Theta2.flatten())
    return nn_paramsFinal , J_history

In [75]:
#It may take very long to finish the training. 
#For 1000 iterations the Training Set Accuracy: 95.28(lambda=0.1; alpha=1)
#You may need more iterations (e.g. 1500) to get better accuracy

alpha=3 #learning rate
num_iters=1000
Lambda=1

#Call gradientDescentnn
nnTheta, nnJ_history = gradientDescentnn(data_NN, pokemon_labels, initial_nn_params, alpha, num_iters, Lambda, input_layer_size, hidden_layer_size, num_labels)

Theta1 = nnTheta[:((input_layer_size+1) * hidden_layer_size)].reshape(hidden_layer_size,input_layer_size+1)
Theta2 = nnTheta[((input_layer_size +1)* hidden_layer_size ):].reshape(num_labels,hidden_layer_size+1)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 4 dimension(s)

4