In [2]:
import os
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
from keras.layers import Dense, LSTM, Dropout
from keras import optimizers
import tensorflow.keras.layers as layers

In [4]:
#Load data
data = pd.read_csv('original_kaggle_healthinsurance.csv')

data.head()

#check if data contains missing values or nan
print(data.isnull().sum())

#drop rows with missing values
data = data.dropna()

print(data.isnull().sum())



# Now apply get_dummies
data = pd.get_dummies(data)

data.replace(False, 0, inplace=True)
data.replace(True, 1, inplace=True)

#put claim column to the end
def move_column_to_end(data, col):
    data[col] = data.pop(col)

# Usage
move_column_to_end(data, 'claim')

#transform pandas back into csv
data.to_csv('one_hot_encoded.csv', index=False)












age                    396
sex                      0
weight                   0
bmi                    956
hereditary_diseases      0
no_of_dependents         0
smoker                   0
city                     0
bloodpressure            0
diabetes                 0
regular_ex               0
job_title                0
claim                    0
dtype: int64
age                    0
sex                    0
weight                 0
bmi                    0
hereditary_diseases    0
no_of_dependents       0
smoker                 0
city                   0
bloodpressure          0
diabetes               0
regular_ex             0
job_title              0
claim                  0
dtype: int64


  data.replace(True, 1, inplace=True)
  data[col] = data.pop(col)


In [70]:
data.head(20)

Unnamed: 0,age,weight,bmi,no_of_dependents,smoker,bloodpressure,diabetes,regular_ex,claim,sex_female,...,job_title_Journalist,job_title_Labourer,job_title_Lawyer,job_title_Manager,job_title_Photographer,job_title_Police,job_title_Politician,job_title_Singer,job_title_Student,job_title_Technician
0,60.0,64,24.3,1,0,72,0,0,13112.6,0,...,0,0,0,0,0,0,0,0,0,0
1,49.0,75,22.6,1,0,78,1,1,9567.0,1,...,0,0,0,0,0,0,0,0,0,0
2,32.0,64,17.8,2,1,88,1,1,32734.2,1,...,0,0,0,0,0,0,0,0,0,0
3,61.0,53,36.4,1,1,72,1,0,48517.6,1,...,0,0,0,0,0,0,0,0,0,0
4,19.0,50,20.6,0,0,82,1,0,1731.7,1,...,0,0,0,0,0,0,0,0,0,0
5,42.0,89,37.9,0,0,78,0,0,6474.0,1,...,0,0,0,0,0,0,0,0,0,0
6,18.0,59,23.8,0,0,64,0,0,1705.6,0,...,0,0,0,0,0,0,0,1,0,0
7,21.0,52,26.8,0,0,74,1,0,1534.3,0,...,0,0,0,0,0,0,0,0,0,0
9,40.0,69,29.6,0,0,64,1,1,5910.9,1,...,0,0,0,0,0,0,0,0,0,0
10,51.0,50,33.0,0,1,0,1,0,44400.4,1,...,0,0,0,0,0,1,0,0,0,0


In [5]:
#Preprocess data
X = data.drop('claim', axis=1)
num_columns = X.shape[1]
print("num of features: " + str(num_columns))
y = data['claim']
X.head()



num of features: 146


Unnamed: 0,age,weight,bmi,no_of_dependents,smoker,bloodpressure,diabetes,regular_ex,sex_female,sex_male,...,job_title_Journalist,job_title_Labourer,job_title_Lawyer,job_title_Manager,job_title_Photographer,job_title_Police,job_title_Politician,job_title_Singer,job_title_Student,job_title_Technician
0,60.0,64,24.3,1,0,72,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,49.0,75,22.6,1,0,78,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
2,32.0,64,17.8,2,1,88,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
3,61.0,53,36.4,1,1,72,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,19.0,50,20.6,0,0,82,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
import joblib
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=69)
# Second split: Split the 40% temporary set into 25% test and 15% evaluation
X_eval, X_test, y_eval, y_test = train_test_split(X_temp, y_temp, test_size=(0.25/0.4), random_state=69)
scaler = StandardScaler()
scaler.fit(X_train)
joblib.dump(scaler, 'scaler.pkl')
#load scaler 
scaler = joblib.load('scaler.pkl')
X_train_scaled = scaler.transform(X_train)
X_eval_scaled = scaler.transform(X_eval)
X_test_scaled = scaler.transform(X_test)
print(X_train_scaled)

[[-0.89546079 -0.3409248  -0.1684676  ... -0.23252521 -0.2985288
  -0.13845047]
 [-0.39847008  0.91004049 -0.15224112 ... -0.23252521 -0.2985288
  -0.13845047]
 [ 0.87950603  1.13079907  2.83343038 ... -0.23252521 -0.2985288
  -0.13845047]
 ...
 [-0.6114661  -1.5183039  -0.72016776 ... -0.23252521 -0.2985288
  -0.13845047]
 [ 0.45351399 -0.12016622  0.5617238  ... -0.23252521 -0.2985288
  -0.13845047]
 [ 1.37649674 -0.92961435 -0.2333735  ... -0.23252521 -0.2985288
  -0.13845047]]


In [12]:

import tensorflow as tf
import numpy as np
from sklearn.model_selection import ParameterSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assume X and y are defined somewhere above this code
input_dim = X.shape[1]

# Define your TensorFlow ANN model
def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(neurons, input_shape=(input_dim,)), kernel_regularizer=tf.keras.regularizers.l2(regularization))
    model.add(tf.keras.layers.LeakyReLU(neurons), kernel_regularizer=tf.keras.regularizers.l2(regularization))
    for _ in range(hidden_layers - 1):
        model.add(tf.keras.layers.Dense(neurons), kernel_regularizer=tf.keras.regularizers.l2(regularization))
        model.add(tf.keras.layers.LeakyReLU(), kernel_regularizer=tf.keras.regularizers.l2(regularization))
    model.add(tf.keras.layers.Dense(1, activation='linear'), kernel_regularizer=tf.keras.regularizers.l2(regularization))
    
    if optimizer == 'adamW':
        optimizer = tf.keras.optimizers.AdamW(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == 'Adadelta':
        optimizer = tf.keras.optimizers.Adadelta(learning_rate=learning_rate)
    elif optimizer == 'Adagrad':
        optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)
    elif optimizer == 'Adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'Adamax':
        optimizer = tf.keras.optimizers.Adamax(learning_rate=learning_rate)
    elif optimizer == 'Nadam':
        optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae', tf.keras.metrics.RootMeanSquaredError(), tf.keras.metrics.MeanAbsolutePercentageError()])
    return model

# Define the parameter grid for grid search
param_grid = {
    'hidden_layers': [4,5, 6, 7, 8, 9, 10 ,11, 12,13,14,15,16], 
    'neurons': [3, 5, 6, 7, 8, 9, 10,11, 12,13,14,15,16], 
    'optimizer': ['adamW', 'Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Nadam', 'RMSprop'],
    'learning_rate': [0.001, 0.01,0.1],
    'regularization': [0.01, 0.01,0.1]
}

# Create a ParameterSampler
param_list = list(ParameterSampler(param_grid, n_iter=300, random_state=69))
best_score = np.inf
best_scores = None
best_params = None

# For each set of parameters
for params in param_list:

    # Create a model
    model = create_model(**params)
    
    # Train the model
    model.fit(X_train_scaled, y_train, epochs=150, batch_size=64, verbose=0)
    
    # Evaluate the model
    score = model.evaluate(X_eval_scaled, y_eval, verbose=0)
    print(score)
    print(params)

    
    if score[0] < best_score:
        best_score = score[0]
        best_params = params
        best_scores = score

print("Best Parameters: ", best_params)
print("Best Score: ", best_score)
print(best_scores)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


TypeError: Sequential.add() got an unexpected keyword argument 'kernel_regularizer'

In [84]:
print("Best Parameters: ", best_params)
print("Best Score: ", best_score)
print(best_scores)

Best Parameters:  {'regularization': 0.01, 'optimizer': 'Adamax', 'neurons': 16, 'learning_rate': 0.1, 'hidden_layers': 16}
Best Score:  12743031.0
[12743031.0, 1836.1156005859375, 3569.73828125, 19.719820022583008]


In [86]:
#hidden_layers, neurons, optimizer, learning_rate, regularization    
#def create_model(hidden_layers, neurons, optimizer, learning_rate, regularization):
model = create_model(**best_params)


# Train the model
model.fit(X_train_scaled, y_train, epochs=300, batch_size=64)
score1= model.evaluate(X_train_scaled, y_train)
score2 = model.evaluate(X_eval_scaled, y_eval)
# Evaluate the model
score3 = model.evaluate(X_test_scaled, y_test)

print("train score: ", score1)
print("eval score: ", score2)
print("test score: ", score3)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/300
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - loss: 657823680.0000 - mae: 15564.2500 - mean_absolute_percentage_error: 140.1449 - root_mean_squared_error: 24682.1250
Epoch 2/300
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 272541952.0000 - mae: 11819.7256 - mean_absolute_percentage_error: 99.6629 - root_mean_squared_error: 16388.6816
Epoch 3/300
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 37517260.0000 - mae: 4264.3623 - mean_absolute_percentage_error: 52.8963 - root_mean_squared_error: 6123.3789
Epoch 4/300
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 32191430.0000 - mae: 3819.3311 - mean_absolute_percentage_error: 46.0942 - root_mean_squared_error: 5670.8257
Epoch 5/300
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 30832932.0000 - mae: 3697.6697 - mean_absolute_percentage_error: 43.2836 - ro

In [10]:
#hidden_layers, neurons, optimizer, learning_rate, regularization    
model = create_model(35, 110, 'adamW', 0.001, 0.3)


# Train the model and also display the error on the evaluation set
model.fit(X_train_scaled, y_train, epochs=150, batch_size=64)
score1 = model.evaluate(X_train_scaled, y_train)
score2 = model.evaluate(X_eval_scaled, y_eval)
# Evaluate the model
score3 = model.evaluate(X_test_scaled, y_test)

print("train score: ", score1)
print("eval score: ", score2)
print("test score: ", score3)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


TypeError: Sequential.add() got an unexpected keyword argument 'kernel_regularizer'

In [1]:
model.save('model_ANN_2.h5')

#load model
model = tf.keras.models.load_model('model_ANN_2".h5')


NameError: name 'model' is not defined