In [2]:
import io
import sys
import re

import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras

In [3]:
df = pd.read_csv("insurance_data.csv")
df.head()

Unnamed: 0,age,affordibility,bought_insurance
0,22,1,0
1,25,0,0
2,47,1,1
3,52,0,0
4,46,1,1


In [4]:
X_train, X_test, y_train, y_test = train_test_split(df[['age', 'affordibility']], df.bought_insurance, test_size=0.2, random_state=25)

print(f"Length of X_train: {len(X_train)}, X_test: {len(X_test)}, y_train: {len(y_train)}, y_test: {len(y_test)}.")

Length of X_train: 22, X_test: 6, y_train: 22, y_test: 6.


In [5]:
X_train_scaled = X_train.copy()
X_train_scaled['age'] = X_train_scaled['age'] / 100

X_test_scaled = X_test.copy()
X_test_scaled['age'] = X_test_scaled['age'] / 100

In [6]:
# Redirect the standard output to capture the printed output
output_capture = io.StringIO()
sys.stdout = output_capture

model = keras.Sequential([
    keras.layers.Dense(100, input_shape=(2,), activation='relu', kernel_initializer="he_normal", bias_initializer="zeros"), # 2 input neurons & 100 neuron in hidden layer
    keras.layers.Dense(1, activation='sigmoid'), # 1 neuron in output layer
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_scaled, y_train, epochs=500)

# After training completes, reset stdout and capture the output
sys.stdout = sys.__stdout__  # Reset the standard output to its original state

# Get the captured output as a string
model_output = output_capture.getvalue()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# Regular expression to match the pattern
pattern = r"Epoch (\d+)/\d+\s+.*\s+(\d+ms|s\s*\d+ms)/step - accuracy: ([\d.]+) - loss: ([\d.]+)"
matches = re.findall(pattern, model_output)

# Parse the matches into lists
epochs = []
times = []
accuracies = []
losses = []

for match in matches:
    epoch = int(match[0])
    time = match[1]
    accuracy = float(match[2])
    loss = float(match[3])
    
    # Handle time conversion
    if 'ms' in time:
        time_taken = float(time.replace('ms', '')) / 1000
    else:
        seconds, ms = time.split('s')
        time_taken = float(seconds) + float(ms.replace('ms', '')) / 1000

    epochs.append(epoch)
    times.append(time_taken)
    accuracies.append(accuracy)
    losses.append(loss)

# Create DataFrame
df_output = pd.DataFrame({
    'Epoch': epochs,
    'Time Taken (s)': times,
    'Accuracy': accuracies,
    'Loss': losses
})

In [8]:
df_output

Unnamed: 0,Epoch,Time Taken (s),Accuracy,Loss
0,2,0.048,0.6818,0.6995
1,3,0.042,0.6818,0.6959
2,4,0.051,0.6818,0.6926
3,5,0.062,0.6818,0.6895
4,6,0.052,0.6818,0.6866
...,...,...,...,...
494,496,0.032,0.8636,0.3402
495,497,0.027,0.8636,0.3401
496,498,0.036,0.8636,0.3400
497,499,0.032,0.8636,0.3400


In [9]:
df_output.describe()

Unnamed: 0,Epoch,Time Taken (s),Accuracy,Loss
count,499.0,499.0,499.0,499.0
mean,251.0,0.043501,0.83236,0.425733
std,144.193157,0.014477,0.067154,0.095939
min,2.0,0.026,0.6818,0.3399
25%,126.5,0.034,0.8636,0.35185
50%,251.0,0.038,0.8636,0.3831
75%,375.5,0.05,0.8636,0.4738
max,500.0,0.162,0.8636,0.6995
