In [1]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load data
pima_df = pd.read_csv("pima_dfdata.csv")

# Features and target
X = pima_df.drop(columns=['outcome']).values
y = pima_df['outcome'].values

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Function to get dataset subset of desired size without repetition
def generate_dataset(size):
    size = min(size, len(X))  # Use available max size if smaller than requested
    return X[:size], y[:size]

# Function to build the model
def build_model(input_dim, hidden_layers_config):
    model = Sequential()
    for idx, nodes in enumerate(hidden_layers_config):
        if idx == 0:
            model.add(Dense(nodes, activation='relu', input_dim=input_dim))
        else:
            model.add(Dense(nodes, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Function to train and evaluate model
def train_evaluate_model(X, y, hidden_layers_config):
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    model = build_model(X.shape[1], hidden_layers_config)
    start_time = time.time()
    history = model.fit(X_train, y_train, epochs=10, batch_size=32,
                        validation_data=(X_val, y_val), verbose=0)
    end_time = time.time()
    train_loss = history.history['loss'][-1]
    val_loss = history.history['val_loss'][-1]
    train_acc = history.history['accuracy'][-1]
    val_acc = history.history['val_accuracy'][-1]
    elapsed_time = end_time - start_time
    return 1 - train_acc, 1 - val_acc, elapsed_time

def main():
    data_sizes = [1000, 10000, 100000]
    configs = {
        '1_hidden_layer_4_nodes': [4],
        '2_hidden_layers_4_nodes_each': [4, 4]
    }
    results = []
    for size in data_sizes:
        X_sub, y_sub = generate_dataset(size)
        for config_name, hidden_layers in configs.items():
            print(f"Training with data size {len(X_sub)} and config {config_name}...")
            train_error, val_error, exec_time = train_evaluate_model(X_sub, y_sub, hidden_layers)
            results.append({
                'Data size': len(X_sub),
                'Configuration': config_name,
                'Training error': train_error,
                'Validation error': val_error,
                'Time of execution (s)': exec_time
            })
    df_results = pd.DataFrame(results)
    print("\nResults summary:")
    print(df_results)

if __name__ == '__main__':
    main()


Training with data size 1000 and config 1_hidden_layer_4_nodes...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training with data size 1000 and config 2_hidden_layers_4_nodes_each...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training with data size 10000 and config 1_hidden_layer_4_nodes...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training with data size 10000 and config 2_hidden_layers_4_nodes_each...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training with data size 100000 and config 1_hidden_layer_4_nodes...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training with data size 100000 and config 2_hidden_layers_4_nodes_each...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Results summary:
   Data size                 Configuration  Training error  Validation error  \
0       1000        1_hidden_layer_4_nodes        0.292500           0.29500   
1       1000  2_hidden_layers_4_nodes_each        0.132500           0.13000   
2      10000        1_hidden_layer_4_nodes        0.011250           0.01100   
3      10000  2_hidden_layers_4_nodes_each        0.004250           0.00400   
4     100000        1_hidden_layer_4_nodes        0.001713           0.00160   
5     100000  2_hidden_layers_4_nodes_each        0.001900           0.00245   

   Time of execution (s)  
0               3.402155  
1               2.728067  
2               7.643392  
3               7.546978  
4              62.041069  
5              65.032699  
