In [None]:
import keras
import numpy as np

from sklearn. datasets import load_iris

In [None]:
X, y = load_iris(return_X_y=True)

In [None]:
X.shape

(150, 4)

In [None]:
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [None]:
y.shape

(150,)

In [None]:
y[-10:] # classes staan geordend

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

Dataset heeft 4 features en 3 classes.

5 neuronen in de verborgen laag.

In [None]:
def get_model():
  model = keras.models.Sequential() # lagen één voor één uitvoeren (input, verborgen en output)

  # Lagen toevoegen
  model.add(keras.layers.Input(shape=(4,), name="input_layer"))
  model.add(keras.layers.Dense(units=5, activation="relu", name="hidden")) # volledig geconnecteerde laag
  model.add(keras.layers.Dense(units=3, activation="softmax", name="output"))

  return model

Hoeveel parameters zal het model hebben?

- Eerste laag: verbindt 4 neuronen met 5 neuronen -> 25 parameters (20 verbindingen en 5 bias parameters)
- Tweede laag: 18 parameters (15 + 3)

Totaal: 43 parameters

In [None]:
model = get_model()
model.summary()

In [None]:
X_batch = X[:2]
X_batch

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2]])

In [None]:
model(X_batch)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.10737818, 0.47041655, 0.42220527],
       [0.143916  , 0.49302828, 0.36305568]], dtype=float32)>

We beginnen bij 2 bij 4 en gaan naar 2 bij 5 en uiteindelijk 2 bij 3.

2 zal niet veranderen want dat is het aantal instances.

Hoe array interpreteren:
- Voor instance 1 is de kans dat het class 1 is = 0.000169, P class 2 = 0.998, P class 3 = 0.0015
- ...

Hoe werkt softmax?

Softmax zet "logits" (willekeurige getallen) om naar een waarschijnlijkheidsverdeling.

Waarschijnlijkheidsverdeling: alle getallen >= 0 en som = 1

In [None]:
# Zonder softmax krijg je deze output voor X1
logits = np.array([5.574026, -1.7056798, -1.7176703])
logits

array([ 5.574026 , -1.7056798, -1.7176703])

In [None]:
# Omzetten naar waarschijnlijkheidsverdeling
np.exp(logits) / np.sum(np.exp(logits))

array([9.98631316e-01, 6.88444797e-04, 6.80239292e-04])

In [None]:
9.98631316e-01 + 6.88444797e-04 + 6.80239292e-04

1.000000000089

Model compileren

In [None]:
model.compile(
    optimizer="sgd", # stochastic gradient descent
    loss="sparse_categorical_crossentropy", # verwacht dat data one hot encoded is
    metrics=["accuracy"]
)

Model trainen

In [None]:
model.fit(
    X, y,
    batch_size=8,
    epochs=20
)

Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.3939 - loss: 1.2598
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3204 - loss: 1.0102
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5104 - loss: 0.9180
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6197 - loss: 0.8555
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7177 - loss: 0.8021
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8064 - loss: 0.7350
Epoch 7/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7803 - loss: 0.7043
Epoch 8/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8176 - loss: 0.6685
Epoch 9/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7ae0cd39dbb0>

In [None]:
y_batch_pred = model(X_batch)
y_batch_pred

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.8475335 , 0.14727752, 0.00518903],
       [0.8336685 , 0.15998423, 0.00634726]], dtype=float32)>

In [None]:
keras.ops.sum(y_batch_pred, axis=1) # per instance hebben we (ongeveer) 1 -> geldige waarschijnlijkheidsverdeling

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.0000001 , 0.99999994], dtype=float32)>

In [None]:
W1, b1 = model.get_layer("hidden").get_weights()
W2, b2 = model.get_layer("output").get_weights()

In [None]:
W2.shape

(5, 3)

In [None]:
# relu(XW + b)
z1 = keras.ops.matmul(X_batch, W1) + b1
a1 = keras.ops.relu(z1)

In [None]:
z1

<tf.Tensor: shape=(2, 5), dtype=float64, numpy=
array([[ 2.21336404,  4.24552591, -0.46883522,  0.06886143, -0.43002751],
       [ 2.01481652,  4.01424143, -0.45144582,  0.04814237, -0.58448701]])>

In [None]:
a1

<tf.Tensor: shape=(2, 5), dtype=float64, numpy=
array([[2.21336404, 4.24552591, 0.        , 0.06886143, 0.        ],
       [2.01481652, 4.01424143, 0.        , 0.04814237, 0.        ]])>

Positieve getallen blijven gelijk na het uitvoeren van de relu functie.
Negatieve getallen worden op 0 gezet

In [None]:
z2 = keras.ops.matmul(a1, W2) + b2
a2 = keras.ops.softmax(z2)

In [None]:
a2

<tf.Tensor: shape=(2, 3), dtype=float64, numpy=
array([[0.84753352, 0.14727745, 0.00518903],
       [0.83366851, 0.15998423, 0.00634726]])>

In [None]:
y_batch_pred

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.8475335 , 0.14727752, 0.00518903],
       [0.8336685 , 0.15998423, 0.00634726]], dtype=float32)>

In [None]:
keras.ops.isclose(a2, y_batch_pred) # zijn items in de arrays gelijk?

<tf.Tensor: shape=(2, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True]])>

In [None]:
keras.ops.all(keras.ops.isclose(a2, y_batch_pred)) # zijn de volledige arrays gelijk?

<tf.Tensor: shape=(), dtype=bool, numpy=True>