In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Normalization,Dense
import numpy as np
import import_ipynb
from load_coffee_data import load_data

In [14]:
x_train, y_train = load_data()

### Normalize Data
- create a "Normalization Layer". Note, as applied here, this is not a layer in your model.

In [15]:
norm_layer = Normalization(axis=-1)

#'adapt' the data. This learns the mean and variance of the data set and saves the values internally.
norm_layer.adapt(x_train)  # learns mean, variance

x_norm = norm_layer(x_train)

copy our data to increase the training set size and reduce the number of training epochs.

In [17]:
x_tile = np.tile(x_norm, (100,1))
y_tile = np.tile(y_train, (100,1))

In [19]:
print(x_tile.shape, y_tile.shape)

(20000, 2) (20000, 1)


### Creating a model using tensorflow

In [24]:
tf.random.set_seed(1234)  # applied to achieve consistent results

model = Sequential(
        [
            tf.keras.Input(shape=(2,)),
            Dense(units=3, activation="sigmoid", name="layer1"),
            Dense(units=1, activation="sigmoid", name="layer2")          
        ]   
        )

The model.summary() provides a description of the network:

In [25]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer1 (Dense)              (None, 3)                 9         
                                                                 
 layer2 (Dense)              (None, 1)                 4         
                                                                 
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________


Instantiated weights and biases by Tensorflow

In [32]:
W1, b1 = model.get_layer("layer1").get_weights()
print(f"w: {W1} b: {b1}")

w: [[ 0.14331901 -0.3954503   0.5523132 ]
 [-0.5084111   0.53487957 -0.81892145]] b: [0. 0. 0.]


In [34]:
W2, b2 = model.get_layer("layer2").get_weights()
print(f"w: {W2} b: {b2}")

w: [[-1.1920437 ]
 [ 0.9825512 ]
 [ 0.42753232]] b: [0.]


The model.compile statement defines a loss function and specifies a compile optimization.

In [35]:
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01),
)

The model.fit statement runs gradient descent and fits the weights to the data.

In [39]:
model.fit(x_tile, 
         y_tile,
         epochs=10   # number of iterations on complete dataset
         )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22654b5f8b0>

### Epochs and batches
The number of epochs was set to 10. This specifies that the entire data set should be applied during training 10 times. During training, you see output describing the progress of training that looks like this:

Epoch 1/10
6250/6250 [==============================] - 6s 910us/step - loss: 0.1782
The first line, Epoch 1/10, describes which epoch the model is currently running. For efficiency, the training data set is broken into 'batches'. The default size of a batch in Tensorflow is 32. There are 200000 examples in our expanded data set or 6250 batches. The notation on the 2nd line 6250/6250 [==== is describing which batch has been executed.

Updated weights and bias

In [40]:
w1, b1 = model.get_layer("layer1").get_weights()
print(f"w: {w1} b: {b1}")

w: [[ 14.214207   -10.640017     0.05994322]
 [ 11.904436    -0.24709323  10.101967  ]] b: [  1.838358 -11.545151  12.142668]


In [41]:
w2, b2 = model.get_layer("layer1").get_weights()
print(f"w: {w2} b: {b2}")

w: [[ 14.214207   -10.640017     0.05994322]
 [ 11.904436    -0.24709323  10.101967  ]] b: [  1.838358 -11.545151  12.142668]


Different training runs can produce somewhat different (w and b) hence different results
- so we will load some saved weights from a previous training run

In [45]:
W1 = np.array([
    [-8.94,  0.29, 12.89],
    [-0.17, -7.34, 10.79]] )
b1 = np.array([-9.87, -9.28,  1.01])
W2 = np.array([
    [-31.38],
    [-27.86],
    [-32.79]])
b2 = np.array([15.54])

#### to set weight and bias to the model

In [47]:
model.get_layer("layer1").set_weights([W1, b1])
model.get_layer("layer2").set_weights([W2, b2])

### predictions

In [48]:
# creating test data
X_test = np.array([
    [200,13.9],  # postive example
    [200,17]])   # negative example

we have normalized the input features so we must normalize our test data as well.
To make a prediction

In [49]:
X_test_norm = norm_layer(X_test)

In [55]:
# predicting 
predictions = model.predict(X_test_norm)
predictions



array([[9.6204257e-01],
       [3.0316290e-08]], dtype=float32)

applying a threshold To convert the probabilities to a decision

In [62]:
y_hat = np.zeros(len(predictions))
for i in range(len(predictions)):
    if predictions[i] >= 0.5:
        y_hat[i] = 1
    else :
        y_hat[i] = 0

In [63]:
y_hat

array([1., 0.])