In [1]:
import sys
import sklearn
import numpy as np
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras

mpl.rc("axes", labelsize=14)
mpl.rc("xtick", labelsize=12)
mpl.rc("ytick", labelsize=12)

PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

# Problema de Gradientes de Fuga e Explosão de Gradientes

## Função de ativação de não saturação

In [2]:
(X_train_full, y_train_full), (X_test, y_test) = (
    keras.datasets.fashion_mnist.load_data()
)
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [3]:
tf.random.set_seed(42)
np.random.seed(42)

In [4]:
model = keras.models.Sequential(
    [
        keras.layers.Input(shape=[28, 28]),
        keras.layers.Flatten(),
        keras.layers.Dense(300, kernel_initializer="he_normal"),
        keras.layers.LeakyReLU(negative_slope=0.2),
        keras.layers.Dense(100, kernel_initializer="he_normal"),
        keras.layers.LeakyReLU(negative_slope=0.2),
        keras.layers.Dense(10, activation="softmax"),
    ]
)

In [5]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

In [6]:
history = model.fit(
    X_train, y_train, epochs=10, validation_data=(X_valid, y_valid)
)

Epoch 1/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.4715 - loss: 1.6400 - val_accuracy: 0.7224 - val_loss: 0.8766
Epoch 2/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7274 - loss: 0.8385 - val_accuracy: 0.7754 - val_loss: 0.7097
Epoch 3/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7707 - loss: 0.7029 - val_accuracy: 0.7974 - val_loss: 0.6347
Epoch 4/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7909 - loss: 0.6357 - val_accuracy: 0.8196 - val_loss: 0.5891
Epoch 5/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8032 - loss: 0.5932 - val_accuracy: 0.8266 - val_loss: 0.5577
Epoch 6/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8110 - loss: 0.5634 - val_accuracy: 0.8312 - val_loss: 0.5346
Epoch 7/10
[1m1

## Implementando a normalização em batch com o Keras

In [7]:
model = keras.models.Sequential(
    [
        keras.layers.Input(shape=[28, 28]),
        keras.layers.Flatten(),
        keras.layers.BatchNormalization(),
        keras.layers.Dense(
            300, activation="elu", kernel_initializer="he_normal"
        ),
        keras.layers.BatchNormalization(),
        keras.layers.Dense(
            100, activation="elu", kernel_initializer="he_normal"
        ),
        keras.layers.BatchNormalization(),
        keras.layers.Dense(10, activation="softmax"),
    ]
)

In [8]:
model.summary()

In [9]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('gamma', True),
 ('beta', True),
 ('moving_mean', False),
 ('moving_variance', False)]

In [10]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

In [11]:
history = model.fit(
    X_train, y_train, epochs=10, validation_data=(X_valid, y_valid)
)

Epoch 1/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.6142 - loss: 1.1367 - val_accuracy: 0.8056 - val_loss: 0.5760
Epoch 2/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7917 - loss: 0.6055 - val_accuracy: 0.8288 - val_loss: 0.5043
Epoch 3/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8116 - loss: 0.5416 - val_accuracy: 0.8424 - val_loss: 0.4709
Epoch 4/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8230 - loss: 0.5075 - val_accuracy: 0.8474 - val_loss: 0.4502
Epoch 5/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8302 - loss: 0.4846 - val_accuracy: 0.8518 - val_loss: 0.4356
Epoch 6/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8363 - loss: 0.4674 - val_accuracy: 0.8534 - val_loss: 0.4244
Epoch 7/10
[1m1

In [12]:
model = keras.models.Sequential(
    [
        keras.layers.Input(shape=[28, 28]),
        keras.layers.Flatten(),
        keras.layers.BatchNormalization(),
        keras.layers.Dense(
            300, kernel_initializer="he_normal", use_bias=False
        ),
        keras.layers.BatchNormalization(),
        keras.layers.Activation("elu"),
        keras.layers.Dense(
            100, kernel_initializer="he_normal", use_bias=False
        ),
        keras.layers.BatchNormalization(),
        keras.layers.Activation("elu"),
        keras.layers.Dense(10, activation="softmax"),
    ]
)

In [13]:
model.summary()

In [14]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

In [15]:
history = model.fit(
    X_train, y_train, epochs=10, validation_data=(X_valid, y_valid)
)

Epoch 1/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5877 - loss: 1.2520 - val_accuracy: 0.7976 - val_loss: 0.6217
Epoch 2/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7853 - loss: 0.6546 - val_accuracy: 0.8234 - val_loss: 0.5364
Epoch 3/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8068 - loss: 0.5789 - val_accuracy: 0.8346 - val_loss: 0.4985
Epoch 4/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8168 - loss: 0.5408 - val_accuracy: 0.8414 - val_loss: 0.4757
Epoch 5/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8239 - loss: 0.5162 - val_accuracy: 0.8480 - val_loss: 0.4598
Epoch 6/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8287 - loss: 0.4981 - val_accuracy: 0.8512 - val_loss: 0.4478
Epoch 7/10
[1m1

In [16]:
def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6)  # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2  # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(
        np.float32
    )  # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A), (X[y_5_or_6], y_B))


(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(
    X_train, y_train
)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(
    X_valid, y_valid
)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)
X_train_B = X_train_B[:200]
y_train_B = y_train_B[:200]

In [17]:
tf.random.set_seed(42)
np.random.seed(42)

In [18]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Input(shape=[28, 28]))
model_A.add(keras.layers.Flatten())
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

In [19]:
model_A.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

In [20]:
history = model_A.fit(
    X_train_A, y_train_A, epochs=20, validation_data=(X_valid_A, y_valid_A)
)

Epoch 1/20
[1m1375/1375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7141 - loss: 0.8713 - val_accuracy: 0.8545 - val_loss: 0.4038
Epoch 2/20
[1m1375/1375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8640 - loss: 0.3855 - val_accuracy: 0.8759 - val_loss: 0.3412
Epoch 3/20
[1m1375/1375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8813 - loss: 0.3338 - val_accuracy: 0.8874 - val_loss: 0.3154
Epoch 4/20
[1m1375/1375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8912 - loss: 0.3101 - val_accuracy: 0.8934 - val_loss: 0.3013
Epoch 5/20
[1m1375/1375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8972 - loss: 0.2951 - val_accuracy: 0.8961 - val_loss: 0.2914
Epoch 6/20
[1m1375/1375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9019 - loss: 0.2841 - val_accuracy: 0.8986 - val_loss: 0.2836
Epoch 7/20
[1m1

In [21]:
model_A.summary()

In [22]:
model_B = keras.models.Sequential()
model_B.add(keras.layers.Input(shape=[28, 28]))
model_B.add(keras.layers.Flatten())
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="sigmoid"))

In [23]:
model_B.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

In [24]:
history = model_B.fit(
    X_train_B, y_train_B, epochs=20, validation_data=(X_valid_B, y_valid_B)
)

Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.6995 - loss: 0.5958 - val_accuracy: 0.7576 - val_loss: 0.5308
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7723 - loss: 0.5057 - val_accuracy: 0.8043 - val_loss: 0.4648
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8451 - loss: 0.4358 - val_accuracy: 0.8641 - val_loss: 0.4117
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9010 - loss: 0.3799 - val_accuracy: 0.8915 - val_loss: 0.3682
Epoch 5/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9171 - loss: 0.3346 - val_accuracy: 0.9108 - val_loss: 0.3321
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9266 - loss: 0.2979 - val_accuracy: 0.9219 - val_loss: 0.3019
Epoch 7/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [25]:
model_B.summary()

In [26]:
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

In [27]:
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())
model_B_on_A = keras.models.Sequential(model_A_clone.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

In [28]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False

model_B_on_A.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)

In [29]:
history = model_B_on_A.fit(
    X_train_B, y_train_B, epochs=4, validation_data=(X_valid_B, y_valid_B)
)

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

model_B_on_A.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.SGD(learning_rate=1e-3),
    metrics=["accuracy"],
)
history = model_B_on_A.fit(
    X_train_B, y_train_B, epochs=16, validation_data=(X_valid_B, y_valid_B)
)

Epoch 1/4
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.8823 - loss: 0.4236 - val_accuracy: 0.8600 - val_loss: 0.4476
Epoch 2/4
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8940 - loss: 0.4018 - val_accuracy: 0.8702 - val_loss: 0.4283
Epoch 3/4
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9094 - loss: 0.3819 - val_accuracy: 0.8783 - val_loss: 0.4105
Epoch 4/4
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9213 - loss: 0.3637 - val_accuracy: 0.8874 - val_loss: 0.3942
Epoch 1/16
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.9449 - loss: 0.3265 - val_accuracy: 0.9260 - val_loss: 0.2989
Epoch 2/16
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9773 - loss: 0.2400 - val_accuracy: 0.9544 - val_loss: 0.2422
Epoch 3/16
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [30]:
model_B_on_A.summary()

In [31]:
model_B.evaluate(X_test_B, y_test_B)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 990us/step - accuracy: 0.9825 - loss: 0.1240


[0.12425917387008667, 0.9850000143051147]

In [32]:
model_B_on_A.evaluate(X_test_B, y_test_B)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 967us/step - accuracy: 0.9917 - loss: 0.0700


[0.07108497619628906, 0.9890000224113464]

In [33]:
(100 - 98.55) / (100 - 98.75)

1.1600000000000024

# Evitando o Sobreajuste na Regularização

## Dropout

In [34]:
model = keras.models.Sequential(
    [
        keras.layers.Input(shape=[28, 28]),
        keras.layers.Flatten(),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(
            300, activation="elu", kernel_initializer="he_normal"
        ),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(
            100, activation="elu", kernel_initializer="he_normal"
        ),
        keras.layers.Dropout(rate=0.2),
        keras.layers.Dense(10, activation="softmax"),
    ]
)

In [35]:
model.summary()

In [37]:
pixel_means = X_train.mean(axis=0, keepdims=True)
pixel_stds = X_train.std(axis=0, keepdims=True)
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_valid_scaled = (X_valid - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds

In [38]:
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="nadam",
    metrics=["accuracy"],
)
n_epochs = 10
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=n_epochs,
    validation_data=(X_valid_scaled, y_valid),
)

Epoch 1/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.7549 - loss: 0.7504 - val_accuracy: 0.8628 - val_loss: 0.3765
Epoch 2/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8403 - loss: 0.4320 - val_accuracy: 0.8742 - val_loss: 0.3440
Epoch 3/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8520 - loss: 0.3980 - val_accuracy: 0.8734 - val_loss: 0.3302
Epoch 4/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8610 - loss: 0.3798 - val_accuracy: 0.8788 - val_loss: 0.3220
Epoch 5/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8674 - loss: 0.3580 - val_accuracy: 0.8730 - val_loss: 0.3381
Epoch 6/10
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8670 - loss: 0.3526 - val_accuracy: 0.8802 - val_loss: 0.3281
Epoch 7/10
[1m1

In [39]:
model.evaluate(X_train_scaled, y_train)

[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 853us/step - accuracy: 0.9059 - loss: 0.2375


[0.2369641810655594, 0.906181812286377]

## Dropout de Monte Carlo (MC)

In [40]:
tf.random.set_seed(42)
np.random.seed(42)

In [41]:
y_probas = np.stack(
    [model(X_test_scaled, training=True) for sample in range(100)]
)
y_proba = y_probas.mean(axis=0)
y_std = y_probas.std(axis=0)

In [47]:
np.round(model.predict(X_test_scaled[:1]), 2)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.39, 0.  , 0.6 ]],
      dtype=float32)

In [49]:
np.round(y_probas[:, :1], 2)

array([[[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.03, 0.  , 0.97]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.34, 0.  , 0.49, 0.  , 0.17]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.58, 0.  , 0.19, 0.  , 0.23]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.02, 0.  , 0.68, 0.  , 0.29]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.03, 0.  , 0.72, 0.  , 0.26]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.69, 0.  , 0.31]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.74, 0.  , 0.25]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.32, 0.  , 0.68]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.18, 0.  , 0.82]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.22, 0.  , 0.39, 0.  , 0.39]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.56, 0.  , 0.43]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.06, 0.  , 0.92]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.02, 0.  , 0.69, 0.  , 0.29]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.16, 0.  , 0.75, 0.  , 0

In [50]:
np.round(y_proba[:1], 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.07, 0.  , 0.41, 0.  , 0.52]],
      dtype=float32)

In [51]:
np.round(y_std[:1], 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.14, 0.  , 0.28, 0.  , 0.3 ]],
      dtype=float32)

In [52]:
y_pred = np.argmax(y_proba, axis=1)
accuracy = np.sum(y_pred == y_test) / len(y_test)
accuracy

0.8764