# Informations:
-   the dataframa i using is avilable in https://www.kaggle.com/datasets/mirichoi0218/insurance

Columns

    age: age of primary beneficiary

    sex: insurance contractor gender, female, male

    bmi: Body mass index, providing an understanding of body, weights that are relatively high or low relative to height,
    objective index of body weight (kg / m ^ 2) using the ratio of height to weight, ideally 18.5 to 24.9

    children: Number of children covered by health insurance / Number of dependents

    smoker: Smoking

    region: the beneficiary's residential area in the US, northeast, southeast, southwest, northwest.

    charges: Individual medical costs billed by health insurance


In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import warnings

warnings.filterwarnings('ignore')

In [6]:
df = pd.read_csv("https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv")

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [8]:
df.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [9]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


### applying one-hot to categorical variables

In [10]:
df1 = pd.get_dummies(df,drop_first=True)

In [11]:
def zero_or_one(x):
    """Returns 1 if x is True, 0 otherwise."""
    return 1 if x else 0
    

def bool_to_numeric(dataframe: pd.DataFrame, col_names: list) -> pd.DataFrame:
  """
  Transforms boolean columns in a DataFrame to numeric (1 or 0).

  Args:
      dataframe: The DataFrame containing boolean columns.
      col_names: A list of column names to convert.

  Returns:
      A new DataFrame with the specified columns converted to numeric.
  """

  dataframe[col_names] = dataframe[col_names].astype(int)
  return dataframe

df1 = bool_to_numeric(df1,df1.columns[4:])

In [12]:
df1

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
0,19,27.900,0,16884.92400,0,1,0,0,1
1,18,33.770,1,1725.55230,1,0,0,1,0
2,28,33.000,3,4449.46200,1,0,0,1,0
3,33,22.705,0,21984.47061,1,0,1,0,0
4,32,28.880,0,3866.85520,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0,1,0,0
1334,18,31.920,0,2205.98080,0,0,0,0,0
1335,18,36.850,0,1629.83350,0,0,0,1,0
1336,21,25.800,0,2007.94500,0,0,0,0,1


In [9]:
X = df1.drop(columns='charges')
y = df1.charges

In [10]:
X.astype(float).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               1338 non-null   float64
 1   bmi               1338 non-null   float64
 2   children          1338 non-null   float64
 3   sex_male          1338 non-null   float64
 4   smoker_yes        1338 non-null   float64
 5   region_northwest  1338 non-null   float64
 6   region_southeast  1338 non-null   float64
 7   region_southwest  1338 non-null   float64
dtypes: float64(8)
memory usage: 83.8 KB


In [12]:
X_train,X_test, y_train, y_test = train_test_split(X.astype(float),
                                                   y.astype(float),
                                                   test_size=0.2)

In [13]:
class Hidden_layers(tf.keras.layers.Layer):
    def __init__(self,units,
                      activation,
                      num_hidden_states,
                      drop_rate,
                      drop_out):
        
        super(Hidden_layers,self).__init__()
        self.unit = units
        self.activation = activation
        self.num_hidden_states = num_hidden_states
        self.drop_out = drop_out
        self.drop_rate = drop_rate

        self.input_layer = tf.keras.layers.Dense(units=self.unit, activation=self.activation, input_shape = (8,)) 
        self.dense = tf.keras.layers.Dense(units=self.unit, activation=self.activation)
        self.dropout = tf.keras.layers.Dropout(self.drop_rate)
    
    def call(self,inputs):
        if self.drop_out:
            input_layer = self.input_layer(inputs)
            drop = self.dropout(input_layer)
            for i in range(self.num_hidden_states):
                dense = self.dense(drop)
                drop = self.dropout(dense)
            return drop
        else:
            dense = self.input_layer(inputs)
            for i in range(self.num_hidden_states):
                dense = self.dense(dense)
            return dense
    

class Regression_model(tf.keras.Model):
    def __init__(self,units,
                      activation,
                      num_hidden_states,
                      drop_rate = 0.2,
                      drop_out = False):
        
        super().__init__()
        self.units = units
        self.activation = activation
        self.num_hidden_states = num_hidden_states
        self.drop_rate = drop_rate
        self.drop_out = drop_out

        self.hiddens_layers = Hidden_layers(self.units,
                                            self.activation,
                                            self.num_hidden_states,
                                            self.drop_rate,
                                            self.drop_out)
        
        self.main_output = tf.keras.layers.Dense(1,name='Output')

    def call(self,inputs):
        hidden_sate = self.hiddens_layers(inputs)
        output = self.main_output(hidden_sate)

        return output

In [14]:
X_train.shape

(1070, 8)

In [15]:
model = Regression_model(units=20,
                         activation='relu',
                         num_hidden_states=3,
                         drop_rate=0.2,
                         drop_out=True)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-05-27 10:09:50.542334: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-27 10:09:50.567893: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-27 10:09:50.572610: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.co

In [16]:
model.summary()

In [17]:
model.compile(loss=tf.keras.losses.mae,
              optimizer= tf.keras.optimizers.SGD(),
              metrics = ['mae'])

model.fit(X_train,y_train,epochs=10)

Epoch 1/10


I0000 00:00:1716815449.706640   17058 service.cc:145] XLA service 0x792d280058c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1716815449.706665   17058 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
2024-05-27 10:10:49.716451: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-05-27 10:10:49.763840: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m 1/34[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 672ms/step - loss: 11798.1914 - mae: 11798.1914

I0000 00:00:1716815450.112220   17058 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 2783859.2500 - mae: 2783859.2500
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 525us/step - loss: 13193.8818 - mae: 13193.8818
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 651us/step - loss: 13673.7217 - mae: 13673.7217
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 654us/step - loss: 13226.6816 - mae: 13226.6816
Epoch 5/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 759us/step - loss: 13794.5547 - mae: 13794.5547
Epoch 6/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 702us/step - loss: 13388.2920 - mae: 13388.2920
Epoch 7/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 610us/step - loss: 13961.4346 - mae: 13961.4346
Epoch 8/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 13372.1758 - mae: 13372.1758 
Epoch 9/10
[1m34/34[0m 

<keras.src.callbacks.history.History at 0x792e4c13e530>