In [2]:
import numpy as np
from functools import reduce
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

#read data and make pd dataframe
insurance_data = pd.read_csv("./Insurance_Data.csv")
insurance_data.head()
data_frame = pd.DataFrame(insurance_data)


# Removing non categorical features and status for now 
cols = [1,2,3,10,11]
data_frame.drop(data_frame.columns[cols], axis=1, inplace=True)

#count the amount of classes for each categorical feature
class_count = {}
for feature in data_frame:
    classes = data_frame[feature].value_counts()
    class_count[feature] = classes

print(class_count.keys())


#encode y
y = data_frame["Disposition"]
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_cat = to_categorical(y_encoded)


#encode x
X = data_frame.drop(columns=["Disposition"])
X = pd.get_dummies(X)

#split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, train_size=0.8, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)


classes_df = pd.DataFrame(classes)
classes_num = classes_df.sum()
print(classes_num.iloc[0])

model = keras.Sequential()
model.add(layers.Input(shape=(X_train.shape[1],) ))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(y_cat.shape[1], activation="softmax"))


model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])


early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

history = model.fit(
 X_train, y_train,
 validation_data=(X_val, y_val),
 batch_size=256,
 epochs=500,
 callbacks=[early_stopping]
)

model.summary()
loss, acc = model.evaluate(X_test, y_test)
print(loss, acc)

dict_keys(['Company', 'Coverage', 'SubCoverage', 'Reason', 'SubReason', 'Disposition', 'Conclusion'])
21173
Epoch 1/500
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.4707 - loss: 1.8826 - val_accuracy: 0.6570 - val_loss: 1.0769
Epoch 2/500
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.6762 - loss: 0.9942 - val_accuracy: 0.6993 - val_loss: 0.9124
Epoch 3/500
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.7135 - loss: 0.8511 - val_accuracy: 0.7139 - val_loss: 0.8576
Epoch 4/500
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.7352 - loss: 0.7657 - val_accuracy: 0.7171 - val_loss: 0.8260
Epoch 5/500
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.7520 - loss: 0.7185 - val_accuracy: 0.7253 - val_loss: 0.8003
Epoch 6/500
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/s

[1m240/240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7470 - loss: 0.7655
0.7495641112327576 0.7524170279502869
