In [21]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn as sk
from sklearn.preprocessing import StandardScaler
tf.config.run_functions_eagerly(True)

In [3]:
df = pd.read_csv('insurance (1).csv')
print(df.head())

   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520


In [4]:
df = pd.get_dummies(df, columns=["sex", "smoker", "region"], drop_first=True)

In [5]:
target_col = 'charges'
y = df[target_col].astype(int)
X = df.drop(columns=[target_col])

In [6]:
print(X.head())
print(y.head())

   age     bmi  children  sex_male  smoker_yes  region_northwest  \
0   19  27.900         0     False        True             False   
1   18  33.770         1      True       False             False   
2   28  33.000         3      True       False             False   
3   33  22.705         0      True       False              True   
4   32  28.880         0      True       False              True   

   region_southeast  region_southwest  
0             False              True  
1              True             False  
2              True             False  
3             False             False  
4             False             False  
0    16884
1     1725
2     4449
3    21984
4     3866
Name: charges, dtype: int64


In [7]:
scaler = StandardScaler()
numeric_cols = ['age', 'bmi', 'children']
X_scaled_numeric = scaler.fit_transform(X[numeric_cols])
bool_cols = ['sex_male', 'smoker_yes', 'region_northwest', 'region_southeast', 'region_southwest']

region_map = {
    'region_northwest': 1,
    'region_southeast': 2,
    'region_southwest': 3
}
region_cols = ['region_northwest', 'region_southeast', 'region_southwest']

def get_region(row):
    for col, val in region_map.items():
        if row[col]:
            return val
    return 0

for col in bool_cols + region_cols:
    X[col] = X[col].astype(int)
X['region'] = X[region_cols].apply(get_region, axis=1)
bool_cols = ['sex_male', 'smoker_yes']
X_scaled = pd.DataFrame(X_scaled_numeric, columns=numeric_cols, index=X.index)
X_scaled[bool_cols] = X[bool_cols]
X_scaled['region'] = X['region']
print(X_scaled.head())

        age       bmi  children  sex_male  smoker_yes  region
0 -1.438764 -0.453320 -0.908614         0           1       3
1 -1.509965  0.509621 -0.078767         1           0       2
2 -0.797954  0.383307  1.580926         1           0       2
3 -0.441948 -1.305531 -0.908614         1           0       1
4 -0.513149 -0.292556 -0.908614         1           0       1


In [8]:
print(X_scaled.shape)
print(y.shape)

(1338, 6)
(1338,)


In [9]:
X_train = X_scaled
y_train = y
print(X_train.shape)
print(y_train.shape)

(1338, 6)
(1338,)


In [10]:
X_check = X_scaled.sample(frac=0.2, random_state=42)
# Split X_train and y_train into 60% train, 20% validation, 20% check
# First, split 20% for check set
X_check = X_train.sample(frac=0.2, random_state=42)
X_train_remain = X_train.drop(X_check.index)
y_check = y_train.loc[X_check.index]
y_train_remain = y_train.drop(X_check.index)

# Then, split 25% of the remaining (which is 20% of original) for validation
X_val = X_train_remain.sample(frac=0.25, random_state=42)
X_train_final = X_train_remain.drop(X_val.index)
y_val = y_train_remain.loc[X_val.index]
y_train_final = y_train_remain.drop(X_val.index)

# Now X_train_final (60%), X_val (20%), X_check (20%)
X_train = X_train_final
y_train = y_train_final
y_check = y.loc[X_check.index]

In [11]:
print(X_train.shape)
print(y_train.shape)

(802, 6)
(802,)


In [12]:
print(X_val.shape)
print(y_val.shape)

(268, 6)
(268,)


In [13]:
print(X_check.shape)
print(y_check.shape)

(268, 6)
(268,)


In [14]:
model1 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)  # Regression output
])

I0000 00:00:1751018704.157436    3247 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5520 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [15]:
model2 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)  # Regression output
])

In [16]:
model3 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)  # Regression output
])

In [26]:
model4 = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(512, activation="relu"),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)  # Regression output
])

In [28]:
optimizer1 = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer2 = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer3 = tf.keras.optimizers.Adam(learning_rate=0.001)
optimizer4 = tf.keras.optimizers.Adam(learning_rate=0.001)

model1.compile(optimizer=optimizer1, loss='mse')
model2.compile(optimizer=optimizer2, loss='mse')
model3.compile(optimizer=optimizer3, loss='mse')
model4.compile(optimizer=optimizer4, loss='mse')

In [None]:
model1.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/100


I0000 00:00:1751018711.183081    3656 service.cc:152] XLA service 0x7f5e88005960 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1751018711.183118    3656 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
2025-06-27 10:05:11.206320: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1751018711.352244    3656 cuda_dnn.cc:529] Loaded cuDNN version 90300




[1m22/26[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 2ms/step - loss: 356745440.0000 

I0000 00:00:1751018713.500407    3656 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 122ms/step - loss: 351497440.0000 - val_loss: 312786656.0000
Epoch 2/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 331075680.0000 - val_loss: 312054784.0000
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 299819904.0000 - val_loss: 309155872.0000
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 318372416.0000 - val_loss: 300301920.0000
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 298248384.0000 - val_loss: 279194816.0000
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 279672448.0000 - val_loss: 240491760.0000
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 227083840.0000 - val_loss: 187098656.0000
Epoch 8/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x7f5fb1456c60>

In [24]:
model2.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/100




[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - loss: 332955264.0000 - val_loss: 312875968.0000
Epoch 2/100
[1m 1/26[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 56ms/step - loss: 210751664.0000



[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 344559456.0000 - val_loss: 312777440.0000
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 313785088.0000 - val_loss: 312584032.0000
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 315784480.0000 - val_loss: 312223360.0000
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 324994080.0000 - val_loss: 311605376.0000
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 325394560.0000 - val_loss: 310585344.0000
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 324939520.0000 - val_loss: 309072288.0000
Epoch 8/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 310368640.0000 - val_loss: 307002208.0000
Epoch 9/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7f5fa814ff50>

In [25]:
model3.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/100
[1m 1/26[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 108ms/step - loss: 344819040.0000



[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 306118208.0000 - val_loss: 312566976.0000
Epoch 2/100
[1m 1/26[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 52ms/step - loss: 420097696.0000



[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - loss: 349931328.0000 - val_loss: 307993088.0000
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - loss: 324408928.0000 - val_loss: 275181184.0000
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - loss: 269328640.0000 - val_loss: 167378000.0000
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 176762304.0000 - val_loss: 122403872.0000
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 134201872.0000 - val_loss: 107497776.0000
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - loss: 115955240.0000 - val_loss: 98601784.0000
Epoch 8/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - loss: 95463896.0000 - val_loss: 89857328.0000
Epoch 9/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x7f5fa9107950>

In [29]:
model4.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/100
[1m 1/26[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 113ms/step - loss: 370739008.0000



[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - loss: 324628000.0000 - val_loss: 309679840.0000
Epoch 2/100




[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - loss: 285760352.0000 - val_loss: 208854288.0000
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - loss: 160131488.0000 - val_loss: 111418880.0000
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - loss: 103627808.0000 - val_loss: 97211488.0000
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - loss: 88053112.0000 - val_loss: 65426416.0000
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - loss: 64005984.0000 - val_loss: 49874248.0000
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - loss: 50860016.0000 - val_loss: 38306100.0000
Epoch 8/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - loss: 40812584.0000 - val_loss: 33780360.0000
Epoch 9/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x7f5f3de57a70>

In [31]:
val_loss = model3.evaluate(X_check, y_check, verbose=1)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 18371638.0000


