In [30]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

#read data and make pd dataframe
insurance_data = pd.read_csv("./Insurance_Data.csv")
insurance_data.head()
data_frame = pd.DataFrame(insurance_data)



# Removing non categorical features and status for now 
cols = [1,2,3,10,11]
data_frame.drop(data_frame.columns[cols], axis=1, inplace=True)

#count the amount of classes for each categorical feature
class_count = {}
for feature in data_frame:
    classes = data_frame[feature].value_counts()
    class_count[feature] = classes

print(class_count.keys())


#encode y
y = data_frame["Conclusion"]
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_cat = to_categorical(y_encoded)


#encode x
X = data_frame.drop(columns=["Conclusion"])
X = pd.get_dummies(X)

#split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, train_size=0.8)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)



model = keras.Sequential()
model.add(layers.Input(shape=(X_train.shape[1],) ))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(y_cat.shape[1], activation="softmax"))


model.compile(optimizer="adam", loss="crossentropy", metrics=['accuracy'])

early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

history = model.fit(
 X_train, y_train,
 validation_data=(X_val, y_val),
 batch_size=256,
 epochs=1000,
 callbacks=[early_stopping]
)

model.summary()


dict_keys(['Company', 'Coverage', 'SubCoverage', 'Reason', 'SubReason', 'Disposition', 'Conclusion'])
Epoch 1/1000
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3974 - loss: 2.9643 - val_accuracy: 0.5409 - val_loss: 1.7068
Epoch 2/1000
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5649 - loss: 1.5567 - val_accuracy: 0.5987 - val_loss: 1.3655
Epoch 3/1000
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6235 - loss: 1.2733 - val_accuracy: 0.6242 - val_loss: 1.2467
Epoch 4/1000
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6485 - loss: 1.1468 - val_accuracy: 0.6353 - val_loss: 1.1868
Epoch 5/1000
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6721 - loss: 1.0526 - val_accuracy: 0.6415 - val_loss: 1.1491
Epoch 6/1000
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - 

KeyboardInterrupt: 