In [1]:
import numpy as np


def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))


def sigmoid_derivative(z):
    return sigmoid(z) * (1.0 - sigmoid(z))



In [2]:
def train(X, y, n_hidden, learning_rate, n_iter):
    m, n_input = X.shape
    W1 = np.random.randn(n_input, n_hidden)
    b1 = np.zeros((1, n_hidden))
    W2 = np.random.randn(n_hidden, 1)
    b2 = np.zeros((1, 1))
    for i in range(1, n_iter+1):
        Z2 = np.matmul(X, W1) + b1
        A2 = sigmoid(Z2)
        Z3 = np.matmul(A2, W2) + b2
        A3 = Z3

        dZ3 = A3 - y
        dW2 = np.matmul(A2.T, dZ3)
        db2 = np.sum(dZ3, axis=0, keepdims=True)

        dZ2 = np.matmul(dZ3, W2.T) * sigmoid_derivative(Z2)
        dW1 = np.matmul(X.T, dZ2)
        db1 = np.sum(dZ2, axis=0)

        W2 = W2 - learning_rate * dW2 / m
        b2 = b2 - learning_rate * db2 / m
        W1 = W1 - learning_rate * dW1 / m
        b1 = b1 - learning_rate * db1 / m

        if i % 100 == 0:
            cost = np.mean((y - A3) ** 2)
            print('Iteration %i, training loss: %f' % (i, cost))

    model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model


In [3]:
def predict(x, model):
    W1 = model['W1']
    b1 = model['b1']
    W2 = model['W2']
    b2 = model['b2']
    A2 = sigmoid(np.matmul(x, W1) + b1)
    A3 = np.matmul(A2, W2) + b2
    return A3


In [5]:
from sklearn import datasets
diabetes = datasets.load_diabetes()
num_test = 10  # the last 10 samples as testing set

from sklearn import preprocessing
scaler = preprocessing.StandardScaler()

X_train = diabetes.data[:-num_test, :]
X_train = scaler.fit_transform(X_train)
y_train = diabetes.target[:-num_test].reshape(-1, 1)
X_test = diabetes.data[-num_test:, :]
X_test = scaler.transform(X_test)
y_test = diabetes.target[-num_test:]



In [6]:
n_hidden = 20
learning_rate = 0.1
n_iter = 2000

model = train(X_train, y_train, n_hidden, learning_rate, n_iter)
predictions = predict(X_test, model)
print(predictions)
print(y_test)


Iteration 100, training loss: 1998.296076
Iteration 200, training loss: 1814.342820
Iteration 300, training loss: 1690.890405
Iteration 400, training loss: 1559.154455
Iteration 500, training loss: 1403.650066
Iteration 600, training loss: 1318.148151
Iteration 700, training loss: 1259.770287
Iteration 800, training loss: 1218.458298
Iteration 900, training loss: 1185.399699
Iteration 1000, training loss: 1154.183768
Iteration 1100, training loss: 1127.510297
Iteration 1200, training loss: 1113.659336
Iteration 1300, training loss: 1105.944286
Iteration 1400, training loss: 1086.427980
Iteration 1500, training loss: 1077.013574
Iteration 1600, training loss: 1058.585169
Iteration 1700, training loss: 1051.573096
Iteration 1800, training loss: 1043.471946
Iteration 1900, training loss: 1035.452119
Iteration 2000, training loss: 1027.459190
[[261.23212844]
 [ 56.39117046]
 [ 96.85606368]
 [115.55683243]
 [ 89.54313034]
 [160.34783525]
 [ 95.03197958]
 [ 47.82868686]
 [222.2577584 ]
 [ 51

In [7]:
# Scikit-learn implementation of neural network

from sklearn.neural_network import MLPRegressor
nn_scikit = MLPRegressor(hidden_layer_sizes=(16, 8), activation='relu', solver='adam',
                         learning_rate_init=0.001, random_state=42, max_iter=2000)
nn_scikit.fit(X_train, y_train)
predictions = nn_scikit.predict(X_test)
print(predictions)
print(np.mean((y_test - predictions) ** 2))


  y = column_or_1d(y, warn=True)


[233.49585182  68.18407781 119.3049426  122.25921605  66.1200858
 200.91764911  96.21982821 110.48092428 203.04431421  74.04628986]
1396.6459679497505


In [8]:
# TensorFlow implementation of neural network

import tensorflow as tf
from tensorflow import keras

tf.random.set_seed(42)


2023-08-06 20:22:00.423454: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
model = keras.Sequential([
    keras.layers.Dense(units=20, activation='relu'),
    keras.layers.Dense(units=8, activation='relu'),
    keras.layers.Dense(units=1)
])


In [10]:
model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.Adam(0.02))


In [11]:
model.fit(X_train, y_train, epochs=300)


predictions = model.predict(X_test)[:, 0]


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [12]:
print(predictions)
print(np.mean((y_test - predictions) ** 2))

[203.54877   20.219824 104.92247  117.740715  33.974667 206.2037
  59.76753  130.19366  200.98221   57.75394 ]
1294.3979079622677
