In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

In [2]:
data = pd.read_csv("Week_11_data.csv")
X = data.drop('outcome', axis=1)
y = data['outcome']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [3]:
def train_model(hidden_layers, nodes_per_layer, data_size=None):
    if data_size:
        # If data_size is specified, use only that much data
        X_train_subset = X_train[:data_size]
        y_train_subset = y_train[:data_size]
    else:
        X_train_subset = X_train
        y_train_subset = y_train

    # Create model
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X.shape[1],)))

    # Add hidden layers
    for _ in range(hidden_layers):
        model.add(keras.layers.Dense(nodes_per_layer, activation='relu'))

    # Add output layer
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Compile model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train model with timing
    start_time = time.time()
    history = model.fit(
        X_train_subset, y_train_subset,
        epochs=10,
        batch_size=32,
        validation_data=(X_val, y_val),
        verbose=0
    )
    execution_time = time.time() - start_time

    # Get final training and validation error
    train_error = 1 - history.history['accuracy'][-1]
    val_error = 1 - history.history['val_accuracy'][-1]

    return train_error, val_error, execution_time

# Run experiments based on the configurations
configurations = [
    {"data_size": 1000, "hidden_layers": 1, "nodes": 4},
    {"data_size": 10000, "hidden_layers": 1, "nodes": 4},
    {"data_size": 100000, "hidden_layers": 1, "nodes": 4},
    {"data_size": 1000, "hidden_layers": 2, "nodes": 4},
    {"data_size": 10000, "hidden_layers": 2, "nodes": 4},
    {"data_size": 100000, "hidden_layers": 2, "nodes": 4},
]

# Run experiments and print results
results = []
for config in configurations:
    print(f"Running: {config['data_size']} samples, {config['hidden_layers']} hidden layers with {config['nodes']} nodes each")
    train_err, val_err, exec_time = train_model(
        config['hidden_layers'],
        config['nodes'],
        config['data_size']
    )
    results.append({
        "Configuration": f"{config['hidden_layers']} hidden layers, {config['nodes']} nodes",
        "Data size": config['data_size'],
        "Training error": f"{train_err:.4f}",
        "Validation error": f"{val_err:.4f}",
        "Time of execution": f"{exec_time:.2f} seconds"
    })

# Display results
results_df = pd.DataFrame(results)
print("\nResults:")
print(results_df)

Running: 1000 samples, 1 hidden layers with 4 nodes each
Running: 10000 samples, 1 hidden layers with 4 nodes each
Running: 100000 samples, 1 hidden layers with 4 nodes each
Running: 1000 samples, 2 hidden layers with 4 nodes each
Running: 10000 samples, 2 hidden layers with 4 nodes each
Running: 100000 samples, 2 hidden layers with 4 nodes each

Results:
              Configuration  Data size Training error Validation error  \
0  1 hidden layers, 4 nodes       1000         0.2490           0.2583   
1  1 hidden layers, 4 nodes      10000         0.0042           0.0055   
2  1 hidden layers, 4 nodes     100000         0.0014           0.0019   
3  2 hidden layers, 4 nodes       1000         0.2160           0.2386   
4  2 hidden layers, 4 nodes      10000         0.0061           0.0081   
5  2 hidden layers, 4 nodes     100000         0.0017           0.0023   

  Time of execution  
0   1679.13 seconds  
1   1499.21 seconds  
2   1463.70 seconds  
3   1561.62 seconds  
4   1382.20 s