In [None]:
import numpy as np
from functools import reduce
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

#read data and make pd dataframe
insurance_data = pd.read_csv("./Insurance_Data.csv")
insurance_data.head()
data_frame = pd.DataFrame(insurance_data)


# Removing non categorical features and status for now 
cols = [1,9,10,11]
data_frame.drop(data_frame.columns[cols], axis=1, inplace=True)

#removing null vals, want to drop the rows where disposition is missing
data_frame.dropna(inplace=True)
print(data_frame["Disposition"])

#encode y
y = data_frame["Disposition"]
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_cat = to_categorical(y_encoded)


#encode x
X = data_frame.drop(columns=["Disposition"])
X = pd.get_dummies(X)

#split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, train_size=0.8, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)


model = keras.Sequential()
model.add(layers.Input(shape=(X_train.shape[1],) ))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dense(y_cat.shape[1], activation="softmax"))


model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])


early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

history = model.fit(
 X_train, y_train,
 validation_data=(X_val, y_val),
 batch_size=256,
 epochs=500,
 callbacks=[early_stopping]
)

model.summary()
loss, acc = model.evaluate(X_test, y_test)
print(loss, acc)

0           Company Position Substantiated
1                            Claim Settled
2                            Claim Settled
3                            Claim Settled
4        Compromised Settlement/Resolution
                       ...                
38256       Company Position Substantiated
38257       Company Position Substantiated
38259                        Claim Settled
38263       Company Position Substantiated
38264                        Claim Settled
Name: Disposition, Length: 18537, dtype: object
Epoch 1/500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.4318 - loss: 2.0735 - val_accuracy: 0.5391 - val_loss: 1.2945
Epoch 2/500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6299 - loss: 1.1018 - val_accuracy: 0.6389 - val_loss: 1.0420
Epoch 3/500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7581 - loss: 0.7752 - val_accuracy: 0.6679 - val_loss: 

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7139 - loss: 0.8616
0.8626819849014282 0.7073894143104553
