In [1]:
import numpy as np

In [2]:
def sigmoid(x): # Activation function: f(x) = 1/(1 + e^(-x))
    
    return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x): # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
    fx = sigmoid(x)
    
    return fx * (1 - fx)

In [3]:
class Neuron:
    
    def __init__(self, weights, bias):
        self.weights = weights
        self.bias = bias
        
    def feedforward(self, inputs): # weight inputs, add bias, activation function
        total = np.dot(self.weights, inputs) + self.bias
        return sigmoid(total)

In [4]:
weights = np.array([0, 1])
bias = 4

neuron = Neuron(weights, bias)

x = np.array([2, 3])

print(neuron.feedforward(x))

0.9990889488055994


In [5]:
class Network:
    '''
    Neural network with:
        - 2 inputs
        - a hidden layer with 2 neurons (h1, h2)
        - an output layer with 1 neuron (o1)
    Each neuron would have same weights and bias
    '''
    
    def __init__(self, weights, bias):
        self.h1 = Neuron(weights, bias)
        self.h2 = Neuron(weights, bias)
        
        self.o1 = Neuron(weights, bias)
        
    def feedforward(self, x):
        out_h1 = self.h1.feedforward(x)
        out_h2 = self.h2.feedforward(x)
        
        # outputs of h1 and h2 are fed into o1
        out_o1 = self.o1.feedforward(np.array([out_h1, out_h2]))
        
        return out_o1

In [6]:
weights = np.array([0, 1])
bias = 0

network = Network(weights, bias)

x = np.array([2, 3])

print(network.feedforward(x))

0.7216325609518421


In [7]:
def mse_loss(y_true, y_pred): # y_true and y_pred are numpy arrays of the same length
    
    return ((y_true - y_pred) ** 2).mean() 

In [8]:
y_true = np.array([1, 0, 0, 1])
y_pred = np.array([0, 0, 0, 0])

print(mse_loss(y_true, y_pred))

0.5


In [9]:
# Here we go!

class NeuralNetwork:
    
    def __init__(self, weights=None, biases=None):
        if not weights:
            self.w1 = np.random.normal()
            self.w2 = np.random.normal()
            self.w3 = np.random.normal()
            self.w4 = np.random.normal()
            self.w5 = np.random.normal()
            self.w6 = np.random.normal()
        else:
            self.w1 = weights[0]
            self.w2 = weights[1]
            self.w3 = weights[2]
            self.w4 = weights[3]
            self.w5 = weights[4]
            self.w6 = weights[5]
        
        if not biases:
            self.b1 = np.random.normal()
            self.b2 = np.random.normal()
            self.b3 = np.random.normal()
        else:
            self.b1 = biases[0]
            self.b2 = biases[1]
            self.b3 = biases[2]
        
        print("Initialized weights:", self.w1, self.w2, self.w3, self.w4, self.w5, self.w6, "\n")
        print("Initialized biases:", self.b1, self.b2, self.b3, "\n")
    
    def feedforward(self, x):
        # x is a numpy array with 2 elements
        h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
        h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
        
        o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
        
        return o1
    
    def train(self, data, y_trues):
        '''
        - data is a (n x 2) numpy array, n = # of samples in the dataset
        - y_trues is a numpy array with n elements
        '''
        learn_rate = 0.1
        epochs = 1000 # number of times to loop through the entire dataset
        
        for epoch in range(epochs):
            for x, y_true in zip(data, y_trues):
                
                # --- feedforward step
                sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
                h1 = sigmoid(sum_h1)
                
                sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
                h2 = sigmoid(sum_h2)
                
                sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
                o1 = sigmoid(sum_o1)
                
                y_pred = o1
                
                # --- partial derivatives
                dL_dypred = -2 * (y_true - y_pred)
                
                # neuron o1
                dypred_dw5 = h1 * deriv_sigmoid(sum_o1)
                dypred_dw6 = h2 * deriv_sigmoid(sum_o1)
                dypred_db3 = deriv_sigmoid(sum_o1)
                
                dypred_dh1 = self.w5 * deriv_sigmoid(sum_o1)
                dypred_dh2 = self.w6 * deriv_sigmoid(sum_o1)
                
                # neuron h1
                dh1_dw1 = x[0] * deriv_sigmoid(sum_h1)
                dh1_dw2 = x[1] * deriv_sigmoid(sum_h1)
                dh1_db1 = deriv_sigmoid(sum_h1)
                
                # neuron h2
                dh2_dw3 = x[0] * deriv_sigmoid(sum_h2)
                dh2_dw4 = x[1] * deriv_sigmoid(sum_h2)
                dh2_db2 = deriv_sigmoid(sum_h2)
                
                # --- updation of weights and biases
                # neuron h1
                self.w1 -= learn_rate * dL_dypred * dypred_dh1 * dh1_dw1
                self.w2 -= learn_rate * dL_dypred * dypred_dh1 * dh1_dw2
                self.b1 -= learn_rate * dL_dypred * dypred_dh1 * dh1_db1
                
                # neuron h2
                self.w3 -= learn_rate * dL_dypred * dypred_dh2 * dh2_dw3
                self.w4 -= learn_rate * dL_dypred * dypred_dh2 * dh2_dw4
                self.b2 -= learn_rate * dL_dypred * dypred_dh2 * dh2_db2
                
                # neuron o1
                self.w5 -= learn_rate * dL_dypred * dypred_dw5
                self.w6 -= learn_rate * dL_dypred * dypred_dw5
                self.b3 -= learn_rate * dL_dypred * dypred_db3
            
            # --- calculation of total loss
            if epoch % 10 == 0:
                y_preds = np.apply_along_axis(self.feedforward, 1, data)
                loss = mse_loss(y_trues, y_preds)
                
                print("Epoch %d loss: %.5f" % (epoch, loss))

In [10]:
# define dataset
data = np.array([
    [-2, -1],
    [25, 6],
    [17, 4],
    [-15, -6]
])

y_trues = np.array([
    1,
    0,
    0,
    1
])

In [11]:
# let's train!

network = NeuralNetwork()
network.train(data, y_trues)

Initialized weights: -0.4267315474989344 1.8037183784371178 1.0830229667723357 -0.403650654175028 -1.32241349474159 2.1896616559746134 

Initialized biases: -0.5498242570853832 0.7467031058617464 0.22020593873861743 

Epoch 0 loss: 0.35323
Epoch 10 loss: 0.19691
Epoch 20 loss: 0.13612
Epoch 30 loss: 0.10579
Epoch 40 loss: 0.08539
Epoch 50 loss: 0.06950
Epoch 60 loss: 0.04556
Epoch 70 loss: 0.03207
Epoch 80 loss: 0.02881
Epoch 90 loss: 0.02732
Epoch 100 loss: 0.02639
Epoch 110 loss: 0.02577
Epoch 120 loss: 0.02534
Epoch 130 loss: 0.02502
Epoch 140 loss: 0.02477
Epoch 150 loss: 0.02458
Epoch 160 loss: 0.02442
Epoch 170 loss: 0.02428
Epoch 180 loss: 0.02416
Epoch 190 loss: 0.02405
Epoch 200 loss: 0.02394
Epoch 210 loss: 0.02385
Epoch 220 loss: 0.02375
Epoch 230 loss: 0.02365
Epoch 240 loss: 0.02355
Epoch 250 loss: 0.02344
Epoch 260 loss: 0.02332
Epoch 270 loss: 0.02317
Epoch 280 loss: 0.02298
Epoch 290 loss: 0.02267
Epoch 300 loss: 0.02196
Epoch 310 loss: 0.01886
Epoch 320 loss: 0.01541
E

**[Reference](https://victorzhou.com/blog/intro-to-neural-networks/)** - Introduction to Neural Networks

In [12]:
# let's predict!

emily = np.array([-7, -3])
frank = np.array([20, 2])

print("Emily: %.3f" % network.feedforward(emily))
print("Frank: %.3f" % network.feedforward(frank))

Emily: 0.958
Frank: 0.040


### MNIST 

In [13]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf

In [14]:
import mnist
from tensorflow import keras

In [15]:
train_images = mnist.train_images()
train_labels = mnist.train_labels()

print(train_images.shape, train_labels.shape)

test_images = mnist.test_images()
test_labels = mnist.test_labels()

(60000, 28, 28) (60000,)


In [16]:
# normalize the images

train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# flatten the images

train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))

print(train_images.shape, test_images.shape)

(60000, 784) (10000, 784)


In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical

In [18]:
model = Sequential([Dense(64, activation='relu', input_shape=(784, )),
                    Dense(128, activation='relu'),
#                     Dropout(0.8),
                    Dense(64, activation='relu'),
                    Dense(10, activation='softmax')])

model.compile(optimizer=Adam(learning_rate=0.01),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [19]:
model.fit(train_images,
          to_categorical(train_labels),
          epochs=5,
          batch_size=32,
          validation_data=(test_images, to_categorical(test_labels)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f06780a5af0>

In [20]:
model.evaluate(test_images,
               to_categorical(test_labels))

# model.save_weights('model.h5')



[0.3000122606754303, 0.9193999767303467]

In [21]:
predictions = model.predict(test_images[ : 5])

print("Predictions:", np.argmax(predictions, axis=1), "\n")

print("Ground truths:", test_labels[ : 5])

Predictions: [7 2 1 0 4] 

Ground truths: [7 2 1 0 4]
