In [2]:
import numpy as np
import pandas as pd

## This is a code from [habr blogpost](https://habr.com/ru/company/ods/blog/335998/) by artgor.

In [17]:
class FNN:
    '''
    
    The full project includes creating a simple website, collecting hand-written digits and using them
    to train a network. 
    I just needed the code for feed-forward network to understand how it works.
    This network has architecture:
    input - fully connected layer - ReLU - fully connected layer - softmax - output
    
    The network has input dimension of N, hidden dimension of H, and performs classification to C classes.
    There is a ReLU activation in the first layer, L2 regularization of weight matrices, and loss function is a softmax.   
        
    '''
    def __init__(self, input_size, hidden_size, output_size):
        '''
        Inputs are:
        input_size - input dimension D,
        hidden_size - hidden dimension H,
        output_size - output dimension C.
        In this function we initialize the model. First we create a params dictionary which stores paramaters 
        for our model. Keys in this dictionary are W1, W2, b1, b2: Weights ans biases for the first and second layers 
        W1 has shape (D, H), W2 - (H, C), b1 - (H,), b2 - (C,).
        Weights are initialized using Xavier initialization method. 
        biases are all zeros at the beginning.
        '''
        self.W1 = ((2 / input_size) ** 0.5) * np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros(hidden_size)
        self.W2 = ((2 / hidden_size) ** 0.5) * np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros(output_size)
#         self.params = {}
#         self.params['W1'] = ((2 / input_size) ** 0.5) * np.random.randn(input_size, hidden_size)
#         self.params['b1'] = np.zeros(hidden_size)
#         self.params['W2'] = ((2 / hidden_size) ** 0.5) * np.random.randn(hidden_size, output_size)
#         self.params['b2'] = np.zeros(output_size)
        
    def loss(self, X, y, reg):
        '''
        This fuction is needed to calculate loss and gradients for our two layer neutal network.
        X is input data with shape (N, D), where N is number of samples, and D is number of features.
        y is the labels matrix.
        reg is regularization strenght, if we need it.
        
        This function returns:
        loss: - data loss and regularzation loss for given dataset.
        grads - dictionary which maps the parameter names to gradients of those paramaters
        with respect to loss function.
        '''
        W1, b1 = self.W1, self.b1
        W2, b2 = self.W2, self.b2
        N, D = X.shape
        
        L1 = np.dot(X, W1) + b1
        L1[L1 <= 0] = 0
        
        L2 = np.dot(L1, W2) + b2
        
        #softmax function
        exp_scores = np.exp(L2 - np.max(L2))
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        scores = L2
        
        #compute the loss
        W1_r = 0.5 * reg * np.sum(W1 * W1)  #i assume that this is a regularized weight matrix 
        W2_r = 0.5 * reg * np.sum(W2 * W2)
        
        loss = -np.sum(np.log(probs[range(y.shape[0]), y])) / N + W1_r + W2_r
        
        #backward pass - compute gradients
        grads = {}  
        
        probs[range(X.shape[0]), y] -= 1  
        
        dW2 = np.dot(L1.T, probs)   
        dW2 /= X.shape[0] # i don't get this part :(
        dW2 += reg * W2
        grads['W2'] = W2
        grads['b2'] = np.sum(probs, axis=0, keepdims=True) / X.shape[0]
        
        delta = probs.dot(W2.T)
        delta = delta * (L1 > 0)
        grads['W1'] = np.dot(X.T, delta) / X.shape[0] + reg * W1
        grads['b1'] = np.sum(delta, axis=0, keepdims=True) / X.shape[0]
        
        return loss, grads
    
    def train(self, X, y, X_val, y_val, learning_rate=0.01, learning_rate_decay=0.99,
              reg=1.0, num_iters=100, batch_size=25, verbose=True):
        '''
        Function to train the network.
        Parameters:
        X - training data with dimensions(N, D)
        y - training labels with dimensions(N,), y[i] = c means the particular label in the set, 
                and 0 <= c < C
        X_val validation data
        y_val - validation labels
        learning_rate - scalar giving learning rate for optimization
        learning_rate_decay - scalar for slowing down learning rate with each epoch
        reg - scalar for regularization strenght
        batch size - number of samples to train each step
        num_iters - number of steps to take when optimizing
        verbose - boolean. If True - print out information about training process
        
        '''
        num_train = X.shape[0]
        iterations_per_epoch = max(num_train / batch_size, 1)
        
        # use Stochastic Gradient Descent to optimize parameters in self.model
        loss_history = []
        train_acc_history = []
        val_acc_history = []
        
        #training cycle
        for iter in range(num_iters):
            #mini-batch selection
            indexes = np.random.choice(X.shape[0], batch_size, replace=True)
            
            X_batch = X[indexes]
            y_batch = y[indexes]
            
            loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
            loss_history.append(loss)
            
            # update weights
            self.W1 -= learning_rate * grads['W1']
            self.b1 -= learning_rate * grads['b1'][0]
            self.W2 -= learning_rate * grads['W2']
            self.b2 -= learning_rate * grads['b2'][0]
            
            if verbose and iter % 100 == 0:
                print('Iteration %d / %d: loss %d' % (iter, num_iters, loss))
                
            # check accuracy every epoch and decay learning_rate
            if iter % iterations_per_epoch == 0:
                
                train_acc = (self.predict(X_train)==y_val).mean()
                val_acc = (self.predict(X_val)==y_val).mean()
                train_acc_history.append(train_acc)
                val_acc_history.append(val_acc)
                
                #decay
                learning_rate *= learning_rate_decay
                
        return {
            'loss history': loss_history,
            'train acc history': train_acc_history,
            'val acc history': val_acc_history
        }
    
    def predict(self, X):
        '''
        Using weight of our two layer network, make predictions for new data.
        Input:
        X - array of shape (N, D), where N  - number of samples, D - number of dimensions of the data.
        Output:
        y_pred - array of shape (N, ), with predicted labels for each data sample. 
        '''
        L1 = np.dot(X, self.W1) + self.b1
        L1[L1 <= 0] = 0
        L2 = np.dot(L1, self.W2) + self.b2
        
        exp_scores = np.exp(L2 - np.max(L2))
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        
        y_preds = np.argmax(probs, axis=1)
        
        return y_preds
    
    def predict_single(self, X):
        '''
        Using weight from our two layer neural network, make prediction for a new sample.
        Input:
        X - array of shape (N, D), where N is number of samples in a new data, D - number of dimensions(features)
        Ouput:
        y_pred - array of shape (1,), with predicted label for a single sample.
        '''
        L1 = np.dot(X, self.W1) + self.b1
        L1[L1 <= 0] = 0
        
        L2 = np.dot(L1, self.W2) + self.b2
        exp_scores = np.exp(L2 - np.max(L2))
        y_pred = np.argmax(exp_scores)
        
        return y_pred

In [4]:
train = pd.read_csv('C:/Users/Spurius/Desktop/digit-recognizer/train.csv')
test = pd.read_csv('C:/Users/Spurius/Desktop/digit-recognizer/test.csv')
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
X_train = train.values[:30000, 1:]
y_train = train.values[:30000, :1]
X_val = train.values[30000:, 1:]
y_val = train.values[30000:, :1]
y_val.shape

(12000, 1)

In [19]:
input_size = 784
hidden_size = 512
output_size = 10
net = FNN(input_size=784, hidden_size=512, output_size=10)

In [20]:
net.train(X_train, y_train, X_val, y_val, learning_rate=0.01, learning_rate_decay=1, reg=1.0, num_iters=100, batch_size=25, verbose=True)

Iteration 0 / 100: loss 4725


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


{'loss history': [4725.82037279748,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 'train acc history': [0.09958318055555555],
 'val acc history': [0.09958295138888888]}

In [8]:
print(net)

<__main__.FNN object at 0x0000020AEEB40F98>


In [1]:
isinstance(net, FNN)

NameError: name 'net' is not defined

In [1]:
import sys
sys.path

['',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\python36.zip',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\DLLs',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\lib',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\lib\\site-packages',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\lib\\site-packages\\html5lib-1.0.1-py3.6.egg',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\lib\\site-packages\\pip-9.0.3-py3.6.egg',
 'C:\\Users\\Spurius\\Anaconda3\\envs\\idp\\lib\\site-packages\\IPython\\extensions',
 'C:\\Users\\Spurius\\.ipython']