In [228]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot
from scipy import optimize
from scipy.io import loadmat
from sklearn.preprocessing import OneHotEncoder


In [229]:
path_train = "../archive/poker-hand-training.csv"
path_test = "../archive/poker-hand-testing.csv"

train_data = pd.read_csv(path_train)
test_data = pd.read_csv(path_test)


In [230]:
train_data.head()

Unnamed: 0,Suit of Card 1,Rank of Card 1,Suit of Card 2,Rank of Card 2,Suit of Card 3,Rank of Card 3,Suit of Card 4,Rank of Card 4,Suit of Card 5,Rank of Card 5,Poker Hand
0,1,10,1,11,1,13,1,12,1,1,9
1,2,11,2,13,2,10,2,12,2,1,9
2,3,12,3,11,3,13,3,10,3,1,9
3,4,10,4,11,4,1,4,13,4,12,9
4,4,1,4,13,4,12,4,11,4,10,9


In [231]:
train_data.columns = ["SuitCard1","RC1", "SuitCard2","RC2","SuitCard3","RC3","SuitCard4","RC4","SuitCard5", "RC5","PH"]
test_data.columns = ["SuitCard1","RC1", "SuitCard2","RC2","SuitCard3","RC3","SuitCard4","RC4","SuitCard5", "RC5","PH"]

In [232]:
train_x = train_data.drop("PH", axis = 1).to_numpy()
train_y = train_data.PH.to_numpy()

test_x = test_data.drop("PH", axis = 1).to_numpy()
test_y = test_data.PH.to_numpy()

encoder = OneHotEncoder(sparse=False)
train_y_onehot = encoder.fit_transform(np.reshape(train_y,(-1,1)))
test_y_onehot = encoder.fit_transform(np.reshape(test_y,(-1,1)))
train_x_onehot = encoder.fit_transform(np.reshape(train_x,(-1,1)))
test_x_onehot = encoder.fit_transform(np.reshape(test_x,(-1,1)))

In [233]:
print(train_y[0])
print(train_y_onehot[0])

9
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [234]:
def sigmoid(z):
    """
    Computes the sigmoid of z.
    """
    return 1.0 / (1.0 + np.exp(-z))

In [235]:
def sigmoidGradient(z):
    g = np.zeros(z.shape)

    g = sigmoid(z)*(1-sigmoid(z))
    return g

In [236]:
input_layer_size = 10
hidden_layer_size1 = 50
hidden_layer_size2 = 30
num_labels = 10

In [237]:
def nnCostFunction(nn_params,
                   input_layer_size,
                   hidden_layer_size1,
                   hidden_layer_size2,
                   num_labels,
                   X, y, lambda_=0.0):
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size1 * (input_layer_size + 1)],
                        (hidden_layer_size1, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size1 * (input_layer_size + 1)):((hidden_layer_size1 * (input_layer_size + 1)) + (hidden_layer_size2 * (hidden_layer_size1+1)))],
                        (hidden_layer_size2, (hidden_layer_size1 + 1)))
    Theta3 = np.reshape(nn_params[(hidden_layer_size2 * (hidden_layer_size1 + 1)+(hidden_layer_size1 * (input_layer_size + 1))):],
                        (num_labels, (hidden_layer_size2 + 1)))
    # Setup some useful variables
    m = y.shape[0]
         
    # You need to return the following variables correctly 
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)
    Theta3_grad = np.zeros(Theta3.shape)

    # ====================== YOUR CODE HERE ======================
    a1 = np.concatenate([np.ones((m, 1)), X], axis=1)
    
    a2 = sigmoid(a1.dot(Theta1.T))
    a2 = np.concatenate([np.ones((a2.shape[0], 1)), a2], axis=1)
    
    a3 = sigmoid(a2.dot(Theta2.T))
    a3 = np.concatenate([np.ones((a3.shape[0], 1)), a3], axis=1)

    a4 = sigmoid(a3.dot(Theta3.T))
    
    y_matrix = y.reshape(-1)
    y_matrix = np.eye(num_labels)[y_matrix]
    
    temp1 = Theta1
    temp2 = Theta2
    temp3 = Theta3
    
    # Add regularization term
    
    reg_term = (lambda_ / (2 * m)) * (np.sum(np.square(temp1[:, 1:])) + np.sum(np.square(temp2[:, 1:])) +np.sum(np.square(temp3[:, 1:])) )
    
    J = (-1 / m) * np.sum((np.log(a4) * y_matrix) + np.log(1 - a4) * (1 - y_matrix)) + reg_term
    
    # Backpropogation
    
    delta_4 = a4 - y_matrix
    delta_3 = delta_4.dot(Theta3)[:, 1:] * sigmoidGradient(a2.dot(Theta2.T))
    delta_2 = delta_3.dot(Theta2)[:, 1:] * sigmoidGradient(a1.dot(Theta1.T))

    Delta1 = delta_2.T.dot(a1)
    Delta2 = delta_3.T.dot(a2)
    Delta3 = delta_4.T.dot(a3)
    
    # Add regularization to gradient

    Theta1_grad = (1 / m) * Delta1
    Theta1_grad[:, 1:] = Theta1_grad[:, 1:] + (lambda_ / m) * Theta1[:, 1:]
    
    Theta2_grad = (1 / m) * Delta2
    Theta2_grad[:, 1:] = Theta2_grad[:, 1:] + (lambda_ / m) * Theta2[:, 1:]

    Theta3_grad = (1 / m) * Delta3
    Theta3_grad[:, 1:] = Theta3_grad[:, 1:] + (lambda_ / m) * Theta3[:, 1:]

    grad = np.concatenate([Theta1_grad.ravel(), Theta2_grad.ravel(), Theta3_grad.ravel()])

    return J, grad

In [238]:
def randInitializeWeights(L_in, L_out, epsilon_init=0.12):
    W = np.zeros((L_out, 1 + L_in))
    W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
    return W

In [239]:
print('Initializing Neural Network Parameters ...')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size1)
initial_Theta2 = randInitializeWeights(hidden_layer_size1, hidden_layer_size2)
initial_Theta3 = randInitializeWeights(hidden_layer_size2, num_labels)
print(initial_Theta1.shape)
print(initial_Theta2.shape)
print(initial_Theta3.shape)

# Unroll parameters
print(initial_Theta1.ravel().shape)
print(initial_Theta2.ravel().shape)
print(initial_Theta3.ravel().shape)
initial_nn_params = np.concatenate([initial_Theta1.ravel(), initial_Theta2.ravel(), initial_Theta3.ravel()], axis=0)
initial_nn_params.shape

Initializing Neural Network Parameters ...
(50, 11)
(30, 51)
(10, 31)
(550,)
(1530,)
(310,)


(2390,)

In [240]:
Theta1 = np.reshape(initial_nn_params[:hidden_layer_size1 * (input_layer_size + 1)],
                    (hidden_layer_size1, (input_layer_size + 1)))

Theta2 = np.reshape(initial_nn_params[(hidden_layer_size1 * (input_layer_size + 1)):((hidden_layer_size1 * (input_layer_size + 1)) + (hidden_layer_size2 * (hidden_layer_size1+1)))],
                    (hidden_layer_size2, (hidden_layer_size1 + 1)))
Theta3 = np.reshape(initial_nn_params[(hidden_layer_size2 * (hidden_layer_size1 + 1)+(hidden_layer_size1 * (input_layer_size + 1))):],
                    (num_labels, (hidden_layer_size2 + 1)))

print(Theta1.ravel().shape)
print(Theta2.shape)
print(Theta3.shape)

(550,)
(30, 51)
(10, 31)


In [241]:
#  After you have completed the assignment, change the maxiter to a larger
#  value to see how more training helps.
options= {'maxiter': 500}

#  You should also try different values of lambda
lambda_ = 0

# Create "short hand" for the cost function to be minimized
costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size1,
                                        hidden_layer_size2,
                                        num_labels, train_x, train_y, lambda_)

# Now, costFunction is a function that takes in only one argument
# (the neural network parameters)
res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)

# get the solution of the optimization
nn_params = res.x
        
# Obtain Theta1 and Theta2 back from nn_params
Theta1 = np.reshape(nn_params[:hidden_layer_size1 * (input_layer_size + 1)],
                    (hidden_layer_size1, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size1 * (input_layer_size + 1)):((hidden_layer_size1 * (input_layer_size + 1)) + (hidden_layer_size2 * (hidden_layer_size1+1)))],
                    (hidden_layer_size2, (hidden_layer_size1 + 1)))
Theta3 = np.reshape(nn_params[(hidden_layer_size2 * (hidden_layer_size1 + 1)+(hidden_layer_size1 * (input_layer_size + 1))):],
                    (num_labels, (hidden_layer_size2 + 1)))

  res = optimize.minimize(costFunction,
  return 1.0 / (1.0 + np.exp(-z))
  J = (-1 / m) * np.sum((np.log(a4) * y_matrix) + np.log(1 - a4) * (1 - y_matrix)) + reg_term
  J = (-1 / m) * np.sum((np.log(a4) * y_matrix) + np.log(1 - a4) * (1 - y_matrix)) + reg_term


In [242]:
def predict(Theta1, Theta2,Theta3, X):
    # Useful values
    m = X.shape[0]
    num_labels = Theta2.shape[0]

    p = np.zeros(m)
    h1 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), X], axis=1), Theta1.T))
    h2 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), h1], axis=1), Theta2.T))
    h3 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), h2], axis=1), Theta3.T))
    p = np.argmax(h3, axis=1)
    return p

In [243]:
pred = predict(Theta1, Theta2, Theta3,test_x)
print('Training Set Accuracy: %f' % (np.mean(pred == test_y) * 100))

Training Set Accuracy: 63.332600
