In [3]:
# Importing libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn import datasets
from sklearn.model_selection import KFold
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import np_utils
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
# Loading dataset diabetes
diabetes = datasets.load_diabetes()
x = diabetes.data
y = diabetes.target
features = diabetes.feature_names
n_features = len(features)

# Create output variables from original labels. This is required only in multiclass problems.
output_y = np_utils.to_categorical(y)   
print(output_y)

# Define MLP model
clf = Sequential()
clf.add(Dense(10, input_dim=x.shape[1], activation='relu'))
clf.add(Dense(10, activation='relu'))
clf.add(Dense(1, activation='linear')) # for 2-class problems, use clf.add(Dense(1, activation='sigmoid'))

# Compile model
clf.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
 
# Fit model
clf.fit(x, output_y, epochs=150, batch_size=5)

# Evaluate model
kf = KFold(n_splits=5, shuffle = True)

mse = 0

for train_index, test_index in kf.split(x):

    # Training phase
    x_train = x[train_index, :]
    y_train = y[train_index]

    clf_cv = Sequential()
    clf_cv.add(Dense(10, input_dim=n_features, activation='relu'))
    clf_cv.add(Dense(10, activation='relu'))
    clf_cv.add(Dense(1, activation='linear'))
    clf_cv.compile(loss='mean_squared_error', optimizer='adam') 
    clf_cv.fit(x_train, y_train, epochs=150, batch_size=5, verbose=0)    

    # Test phase
    x_test = x[test_index, :]
    y_test = y[test_index]
    y_pred = clf_cv.predict(x_test)

    mse += ((y_test - y_pred) ** 2).mean()

mse /= 5
print('MSE = ', mse)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66

In [5]:
# Definiendo diferentes tamaños de lote
# Define MLP model
clf = Sequential()
clf.add(Dense(10, input_dim=x.shape[1], activation='relu'))
clf.add(Dense(10, activation='relu'))
clf.add(Dense(1, activation='linear')) # for 2-class problems, use clf.add(Dense(1, activation='sigmoid'))

# Compile model
clf.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# Set the batch sizes to test
batch_sizes = list(range(10, 201, 10))

mse_scores = []

# Perform cross-validation for each batch size
kf = KFold(n_splits=5, shuffle=True)

for batch_size in batch_sizes:
    mse = 0

    for train_index, test_index in kf.split(x):
        # Training phase
        x_train = x[train_index, :]
        y_train = y[train_index]

        clf_cv = Sequential()
        clf_cv.add(Dense(10, input_dim=n_features, activation='relu'))
        clf_cv.add(Dense(10, activation='relu'))
        clf_cv.add(Dense(1, activation='linear'))
        clf_cv.compile(loss='mean_squared_error', optimizer='adam')
        clf_cv.fit(x_train, y_train, epochs=150, batch_size=batch_size, verbose=0)

        # Test phase
        x_test = x[test_index, :]
        y_test = y[test_index]
        y_pred = clf_cv.predict(x_test)

        mse += mean_squared_error(y_test, y_pred)

    mse /= 5
    mse_scores.append(mse)

# Print MSE scores for each batch size
for i, batch_size in enumerate(batch_sizes):
    print(f"MSE with batch size {batch_size}: {mse_scores[i]}")

MSE with batch size 10: 3062.62511431542
MSE with batch size 20: 3285.40459720405
MSE with batch size 30: 3415.4689369219473
MSE with batch size 40: 3642.231151669538
MSE with batch size 50: 3950.171627325975
MSE with batch size 60: 4746.949564171033
MSE with batch size 70: 5154.524091910501
MSE with batch size 80: 10963.793460108802
MSE with batch size 90: 12427.65420352184
MSE with batch size 100: 12260.572598494382
MSE with batch size 110: 9836.289122590413
MSE with batch size 120: 19723.78964505688
MSE with batch size 130: 18510.88796513753
MSE with batch size 140: 18760.903174367042
MSE with batch size 150: 17019.509795974394
MSE with batch size 160: 19915.15601035765
MSE with batch size 170: 21892.555503575863
MSE with batch size 180: 26133.88833721342
MSE with batch size 190: 26590.367216258637
MSE with batch size 200: 26037.31554352103
