In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# STEP 2: Load Dataset
# =========================================
from google.colab import files
uploaded = files.upload()

Saving synthetic_insurance_50k.csv to synthetic_insurance_50k.csv


In [None]:
# Replace with your file name
df = pd.read_csv("synthetic_insurance_50k.csv")

print("Data Shape:", df.shape)
print(df.head())

Data Shape: (50000, 11)
   Age State Insurance Type  Annual Premium (AUD)  Claim Amount (AUD)  \
0   54   VIC           Life               3226.72             8762.05   
1   38   TAS         Health               4255.91             5699.27   
2   56   QLD         Health               6318.77            12724.00   
3   78   ACT            Car               3404.39             6785.40   
4   23   NSW           Life               1816.43              602.24   

  Claim Status Policy Start Date Policy End Date Product Tier  \
0     Approved        21-02-2023      21-02-2024      Premium   
1     Approved        10-07-2023      09-07-2024      Premium   
2     Approved        23-09-2023      22-09-2024      Premium   
3     Approved        12-08-2024      12-08-2025         Gold   
4     Approved        30-06-2023      29-06-2024        Basic   

  Payment Frequency  Risk Score  
0          Annually        0.72  
1          Annually        0.49  
2          Annually        0.75  
3         

In [None]:
# STEP 3: Define Features & Target
# =========================================
target_col = "Product Tier"

X = df.drop(target_col, axis=1)
y = df[target_col]

# Encode target (classes → numbers)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# One-hot encode categorical features
X = pd.get_dummies(X)

# Scale numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train shape:", X_train.shape, "Test shape:", X_test.shape)

Train shape: (40000, 1483) Test shape: (10000, 1483)


In [None]:
# STEP 4: Handle Class Imbalance (if any)
# =========================================
class_weights = compute_class_weight(
    class_weight="balanced", classes=np.unique(y), y=y
)
class_weights = dict(enumerate(class_weights))
print("Class Weights:", class_weights)

Class Weights: {0: np.float64(1.0), 1: np.float64(1.0), 2: np.float64(1.0), 3: np.float64(1.0)}


In [None]:
# STEP 5: Build Deep ANN Model
# =========================================
model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(len(np.unique(y)), activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=64,
    verbose=1
)

Epoch 1/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step - accuracy: 0.3995 - loss: 1.2796 - val_accuracy: 0.5980 - val_loss: 0.7314
Epoch 2/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - accuracy: 0.5852 - loss: 0.7761 - val_accuracy: 0.6109 - val_loss: 0.6882
Epoch 3/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - accuracy: 0.6081 - loss: 0.7224 - val_accuracy: 0.6284 - val_loss: 0.6524
Epoch 4/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - accuracy: 0.6103 - loss: 0.6984 - val_accuracy: 0.6382 - val_loss: 0.6318
Epoch 5/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.6290 - loss: 0.6585 - val_accuracy: 0.6338 - val_loss: 0.6130
Epoch 6/100
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - accuracy: 0.6318 - loss: 0.6519 - val_accuracy: 0.6339 - val_loss: 0.6170
Epoch 7/100
[

In [None]:
# STEP 7: Evaluate ANN
# =========================================
loss, acc = model.evaluate(X_test, y_test)
print(f"✅ Test Accuracy: {acc:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6286 - loss: 1.0493
✅ Test Accuracy: 0.64


In [None]:
sample = X_test[:1]
pred = model.predict(sample)
recommended_tire = label_encoder.inverse_transform([pred.argmax()])
print("🔮 Recommended Tire:", recommended_tire[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 281ms/step
🔮 Recommended Tire: Standard
