In [1]:
import pandas as pd
import numpy as np
from scipy import optimize

In [2]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 1054 entries, label to v350
dtypes: int64(1054)
memory usage: 482.5 MB


In [33]:

x_train, y_train = train_data.iloc[:10000, 0:], train_data.iloc[:10000, 0]
x_train, y_train = x_train.to_numpy(), y_train.to_numpy()
x_test, y_test = test_data.iloc[:1000, 0:], test_data.iloc[:1000, 0]
x_test, y_test = x_test.to_numpy(), y_test.to_numpy()

print(x_train.shape, x_test.shape)

(100, 1054) (1000, 1054)


In [34]:
def randInitializeWeights(L_in, L_out, epsilon_init=0.12):
    W = np.zeros((L_out, 1 + L_in))
    W = np.random.rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init
    return W

In [35]:
input_layer_size  = 1054
hidden_layer_size = 100
num_labels = 10   

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

initial_nn_params = np.concatenate([initial_Theta1.ravel(), initial_Theta2.ravel()], axis=0)

In [36]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def sigmoidGradient(z):
    g = np.zeros(z.shape)
    g = sigmoid(z) * (1 - sigmoid(z))
    return g

In [31]:
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, lambda_=0.0):

    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                        (num_labels, (hidden_layer_size + 1)))

    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    m = y.size

    a1 = np.concatenate([np.ones((m, 1)), x], axis=1)
    a2 = sigmoid(a1.dot(Theta1.T))
    a2 = np.concatenate([np.ones((a2.shape[0], 1)), a2], axis=1)
    a3 = sigmoid(a2.dot(Theta2.T))

    y_matrix = y.reshape(-1)
    y_matrix = np.eye(num_labels)[y_matrix]
  
    temp1 = Theta1
    temp2 = Theta2
   
    
    reg_term = (lambda_ / (2 * m)) * (np.sum(np.square(temp1[:, 1:])) + np.sum(np.square(temp2[:, 1:])))
    J = (-1 / m) * np.sum((np.log(a3) * y_matrix) + np.log(1 - a3) * (1 - y_matrix)) + reg_term

    delta_3 = a3 - y_matrix
    delta_2 = delta_3.dot(Theta2)[:, 1:] * sigmoidGradient(a1.dot(Theta1.T))
    Delta1 = delta_2.T.dot(a1)
    Delta2 = delta_3.T.dot(a2)

    Theta1_grad = (1 / m) * Delta1
    Theta1_grad[:, 1:] = Theta1_grad[:, 1:] + (lambda_ / m) * Theta1[:, 1:]
    Theta2_grad = (1 / m) * Delta2
    Theta2_grad[:, 1:] = Theta2_grad[:, 1:] + (lambda_ / m) * Theta2[:, 1:]

    grad = np.concatenate([Theta1_grad.ravel(), Theta2_grad.ravel()])

    return J, grad

In [37]:
options= {'maxiter': 10}

lambda_ = 1

costFunction = lambda p: nnCostFunction(p, input_layer_size,
                                        hidden_layer_size,
                                        num_labels, x_train, y_train, lambda_)

res = optimize.minimize(costFunction,
                        initial_nn_params,
                        jac=True,
                        method='TNC',
                        options=options)

nn_params = res.x

Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                    (hidden_layer_size, (input_layer_size + 1)))

Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):],
                    (num_labels, (hidden_layer_size + 1)))

  res = optimize.minimize(costFunction,
  return 1.0 / (1.0 + np.exp(-z))


In [38]:
def predict(Theta1, Theta2, X):
    m = X.shape[0]
    p = np.zeros(m)
    h1 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), X], axis=1), Theta1.T))
    h2 = sigmoid(np.dot(np.concatenate([np.ones((m, 1)), h1], axis=1), Theta2.T))
    p = np.argmax(h2, axis=1)
    return p

In [39]:
pred = predict(Theta1, Theta2, x_test[:100,:])
print('Training Set Accuracy: %f' % (np.mean(pred == y_test[:100]) * 100))
print(pred)
print(y_test[:100])

Training Set Accuracy: 25.000000
[1 6 1 0 4 1 9 7 6 6 6 8 6 0 1 3 8 7 0 7 6 6 7 0 1 0 1 4 0 1 6 1 4 6 7 8 1
 1 4 7 1 3 3 9 9 8 9 9 0 3 3 0 3 0 0 3 3 1 9 1 7 0 1 7 3 1 7 6 2 4 1 0 6 8
 1 9 1 3 6 0 9 8 8 7 3 0 9 6 8 1 7 6 4 0 7 0 1 3 4 8]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1 3 1 3 4 7 2 7
 1 2 1 1 7 4 2 3 5 1 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 9 3 7 4 6 4 3 0 7 0 2 9
 1 7 3 2 9 7 7 6 2 7 8 4 7 3 6 1 3 6 9 3 1 4 1 7 6 9]
