In [1]:
import numpy as np
from functools import reduce
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping


#read data and make pd dataframe
insurance_data = pd.read_csv("./Insurance_Data.csv")
insurance_data.head()
data_frame = pd.DataFrame(insurance_data)


# Removing non categorical features and status for now 
cols = [1,9,10,11]
data_frame.drop(data_frame.columns[cols], axis=1, inplace=True)

#removing null vals, want to drop the rows where disposition is missing
data_frame.dropna(inplace=True)
print(data_frame["Disposition"])

#encode y
y = data_frame["Disposition"]
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_cat = to_categorical(y_encoded)


#encode x
X = data_frame.drop(columns=["Disposition"])
X = pd.get_dummies(X)

#split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, train_size=0.8, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True)


model = keras.Sequential()
model.add(layers.Input(shape=(X_train.shape[1],)))
model.add(layers.Dense(512, activation="relu"))
model.add(layers.Dropout(0.5))  
model.add(layers.Dense(256, activation="relu"))  
model.add(layers.Dropout(0.5))  
model.add(layers.Dense(y_cat.shape[1], activation="softmax"))


model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])


# Convert y_encoded back to flat labels
y_int = np.argmax(y_train, axis=1)

# Compute class weights
weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_int),
    y=y_int
)

# Map to a dictionary
class_weights = dict(enumerate(weights))

early_stopping = EarlyStopping(
    monitor="val_loss",      
    patience=10,              
    min_delta=0.001,          
    restore_best_weights=True  
)

history = model.fit(
 X_train, y_train,
 validation_data=(X_val, y_val),
 batch_size=256,
 epochs=500,
 callbacks=[early_stopping],
class_weight=class_weights

)

model.summary()
loss, acc = model.evaluate(X_test, y_test)
print(loss, acc)

0           Company Position Substantiated
1                            Claim Settled
2                            Claim Settled
3                            Claim Settled
4        Compromised Settlement/Resolution
                       ...                
38256       Company Position Substantiated
38257       Company Position Substantiated
38259                        Claim Settled
38263       Company Position Substantiated
38264                        Claim Settled
Name: Disposition, Length: 18537, dtype: object
Epoch 1/500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.1854 - loss: 2.9499 - val_accuracy: 0.3722 - val_loss: 2.0425
Epoch 2/500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.3547 - loss: 3.0353 - val_accuracy: 0.4690 - val_loss: 1.6482
Epoch 3/500
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.5015 - loss: 1.4189 - val_accuracy: 0.5131 - val_loss: 

[1m116/116[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6530 - loss: 1.0891
1.0417137145996094 0.6712513566017151
