In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load and prepare data
df = pd.read_csv("dfdata.csv")
X = df.drop("outcome", axis=1)
y = df["outcome"]

# Normalize input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Function to build models
def build_model(layers=1, nodes=4, input_dim=8):
    model = Sequential()
    model.add(Dense(nodes, activation='relu', input_dim=input_dim))
    for _ in range(layers - 1):
        model.add(Dense(nodes, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(0.01), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Dataset sizes and configurations
sizes = [1000, 10000, 100000]
configs = [(1, 4), (2, 4)]

results = []

for size in sizes:
    # If size > data, replicate rows
    reps = int(np.ceil(size / len(X_scaled)))
    X_sampled = np.tile(X_scaled, (reps, 1))[:size]
    y_sampled = np.tile(y.values, reps)[:size]

    # Train/test split
    X_train, X_val, y_train, y_val = train_test_split(X_sampled, y_sampled, test_size=0.2, random_state=42)

    for layers, nodes in configs:
        model = build_model(layers=layers, nodes=nodes)
        start_time = time.time()
        history = model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0, validation_data=(X_val, y_val))
        end_time = time.time()

        train_error = history.history['loss'][-1]
        val_error = history.history['val_loss'][-1]
        exec_time = round(end_time - start_time, 2)

        results.append({
            "Data size": size,
            "Configuration": f"{layers} layer(s) x {nodes} nodes",
            "Training error": round(train_error, 4),
            "Validation error": round(val_error, 4),
            "Execution time (s)": exec_time
        })

# Display results
results_df = pd.DataFrame(results)
print(results_df)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


   Data size         Configuration  Training error  Validation error  \
0       1000  1 layer(s) x 4 nodes          0.0682            0.0639   
1       1000  2 layer(s) x 4 nodes          0.0323            0.0377   
2      10000  1 layer(s) x 4 nodes          0.0177            0.0141   
3      10000  2 layer(s) x 4 nodes          0.0168            0.0129   
4     100000  1 layer(s) x 4 nodes          0.0087            0.0077   
5     100000  2 layer(s) x 4 nodes          0.0105            0.0122   

   Execution time (s)  
0                3.11  
1                3.04  
2                8.91  
3                9.38  
4               64.67  
5               76.09  
