In [7]:
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [8]:
from tensorflow.keras.datasets import boston_housing

In [3]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

scalar = StandardScaler()
train_data = scalar.fit_transform(train_data)
test_data = scalar.transform(test_data)

valid_data = train_data[:100]
valid_targets = train_targets[:100]
train_data = train_data[100:]
train_targets = train_targets[100:]

type(train_data)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz
[1m57026/57026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


numpy.ndarray

In [None]:
model = models.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(1)
])

In [None]:
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])

# k=1-fold validation

In [None]:
history = model.fit(train_data, train_targets, epochs=20, batch_size=16, validation_data=(valid_data, valid_targets))
res = model.evaluate(test_data, test_targets, return_dict=True)
res

Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 4.1664 - mae: 1.4930 - val_loss: 7.9119 - val_mae: 2.1618
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5.9964 - mae: 1.6829 - val_loss: 7.2853 - val_mae: 2.0452
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.2646 - mae: 1.4452 - val_loss: 7.4506 - val_mae: 2.1005
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.2513 - mae: 1.5098 - val_loss: 8.3025 - val_mae: 2.2321
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5.1277 - mae: 1.6188 - val_loss: 7.6140 - val_mae: 2.1011
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.8278 - mae: 1.5925 - val_loss: 7.2125 - val_mae: 1.9878
Epoch 7/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.9862 -

{'loss': 18.164674758911133, 'mae': 2.6117475032806396}

# k-fold validation

In [None]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

scalar = StandardScaler()
train_data = scalar.fit_transform(train_data)
test_data = scalar.transform(test_data)

k = 8
s_len = len(train_data) // k
mae_list = []

for i in range(k):
    indices = np.arange(train_data.shape[0])  # Generate indices for the data
    np.random.shuffle(indices)  # Shuffle the indices randomly

    # Reorder both data and targets using the shuffled indices
    temp = train_data[indices]
    temp_targets = train_targets[indices]

    valid_data = temp[:i * s_len]
    valid_targets = temp_targets[:i * s_len]
    train_data = temp[i * s_len:]
    train_targets = temp_targets[i * s_len:]

    history = model.fit(temp, temp_targets, epochs=10, batch_size=16, validation_data=(valid_data, valid_targets))
    res = model.evaluate(test_data, test_targets, return_dict=True)

    mae_list.append(res["mae"])

avg_mae = np.mean(mae_list)
avg_mae

In [13]:
def build_model():
    model = models.Sequential([
        layers.Dense(64, activation="relu"),
        layers.Dense(64, activation="relu"),
        layers.Dense(1)
    ])
    model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
    return model

In [24]:
def validate_k_fold(
    k: int,
    epochs: int,
    batch_size: int,
    train_data: np.ndarray,
    train_targets: np.ndarray,
    test_data: np.ndarray,
    test_targets: np.ndarray,
    model_builder
):
    if train_data is None or test_data is None or model_builder is None:
        raise ValueError("Missing arguments.")

    sample_size = len(train_data) // k
    samples = np.empty((k, sample_size, *train_data.shape[1:]))  # Adjust dimensions based on train_data shape
    targets = np.empty((k, sample_size), dtype=train_targets.dtype)

    # Split data into k folds
    for i in range(k):
        sample = train_data[i * sample_size: (i+1) * sample_size]
        target = train_targets[i * sample_size: (i+1) * sample_size]
        samples[i, :, :] = sample
        targets[i, :] = target  # Correct assignment

    mae_arr = np.zeros(k)

    for i in range(k):
        # Use all folds except the i-th fold for training
        partial_train_data = np.concatenate((samples[:i], samples[i+1:]), axis=0)
        partial_train_targets = np.concatenate((targets[:i], targets[i+1:]), axis=0)
        partial_valid_data = samples[i]
        partial_valid_targets = targets[i]

        # Build and train the model
        model = model_builder()
        model.fit(
            partial_train_data.reshape(-1, train_data.shape[1]),
            partial_train_targets.flatten(),
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(
                partial_valid_data.reshape(-1, train_data.shape[1]),
                partial_valid_targets.flatten()
            )
        )

        # Evaluate the model on the test set
        _, mae = model.evaluate(test_data, test_targets, verbose=0)
        mae_arr[i] = mae

    return np.mean(mae_arr)


In [14]:
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

scalar = StandardScaler()
train_data = scalar.fit_transform(train_data)
test_data = scalar.transform(test_data)

In [26]:
avg_mae = validate_k_fold(4, 20, 1, train_data, train_targets, test_data, test_targets, build_model)
avg_mae

Epoch 1/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 401.6525 - mae: 17.3260 - val_loss: 23.9113 - val_mae: 3.1571
Epoch 2/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 27.1917 - mae: 3.7169 - val_loss: 16.7709 - val_mae: 2.4826
Epoch 3/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 17.6754 - mae: 2.9707 - val_loss: 13.0398 - val_mae: 2.4404
Epoch 4/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 15.0464 - mae: 2.5662 - val_loss: 12.7909 - val_mae: 2.3936
Epoch 5/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 14.1707 - mae: 2.6744 - val_loss: 11.7562 - val_mae: 2.2507
Epoch 6/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 13.6292 - mae: 2.3811 - val_loss: 10.4923 - val_mae: 2.1878
Epoch 7/20
[1m303/303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[

2.810963809490204