In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import time

In [2]:
# Load and preprocess the dataset
def load_and_preprocess_data(csv_file):
    # Load the CSV data into a pandas dataframe
    df = pd.read_csv(csv_file)

    # Separate features and the output column
    X = df.drop('outcome', axis=1)
    y = df['outcome']

    # Normalize the data using MinMaxScaler (instead of custom normalization)
    # scaler = MinMaxScaler()
    # X_scaled = scaler.fit_transform(X)

    # Convert data to numpy arrays for TensorFlow
    X_scaled = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.float32)

    return X_scaled, y

In [3]:
# Build and compile the model
def build_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(4, activation='relu', input_shape=input_shape),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [4]:
# Build and compile the model with 2 hidden layers of 4 nodes each
def build_model_2(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(4, activation='relu', input_shape=input_shape),
        tf.keras.layers.Dense(4, activation='relu'),  # Second hidden layer
        tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [5]:
# Train the model
def train_ann_model(csv_file, model_num):
    # Load and preprocess data
    X, y = load_and_preprocess_data(csv_file)

    # Split data into training and testing sets (80-20 split)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    start_time = time.time()

    # Build the model
    if model_num == 1:
        model = build_model((X_train.shape[1],))
    elif model_num == 2:
        model = build_model_2((X_train.shape[1],))

    # Train the model
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        verbose=0
    )

    # Evaluate the model on the test data
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)

    total_time = time.time() - start_time

    print(f"Test accuracy: {test_acc:.4f} and Total time taken is {total_time:.4f} seconds")

In [6]:
train_ann_model("sample_1000.csv", 1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


7/7 - 0s - 55ms/step - accuracy: 0.7800 - loss: 0.7418
Test accuracy: 0.7800 and Total time taken is 11.0356 seconds


In [7]:
train_ann_model("sample_10000.csv", 1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


63/63 - 1s - 9ms/step - accuracy: 0.8505 - loss: 0.3523
Test accuracy: 0.8505 and Total time taken is 34.3881 seconds


In [8]:
train_ann_model("sample_100000.csv", 1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


625/625 - 1s - 2ms/step - accuracy: 0.9870 - loss: 0.0339
Test accuracy: 0.9870 and Total time taken is 353.5016 seconds


In [9]:
train_ann_model("sample_1000.csv", 2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


7/7 - 0s - 60ms/step - accuracy: 0.7850 - loss: 0.5594
Test accuracy: 0.7850 and Total time taken is 9.5472 seconds


In [10]:
train_ann_model("sample_10000.csv", 2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


63/63 - 1s - 8ms/step - accuracy: 0.9165 - loss: 0.1986
Test accuracy: 0.9165 and Total time taken is 35.5023 seconds


In [11]:
train_ann_model("sample_100000.csv", 2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


625/625 - 1s - 2ms/step - accuracy: 0.9966 - loss: 0.0297
Test accuracy: 0.9966 and Total time taken is 355.6824 seconds
