In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, Conv1D, MaxPooling1D, Dropout, Attention
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

# Load and preprocess data (assuming this part is done as previously described)
df=pd.read_csv("C:\\Users\\AksharaVenkatesh\\OneDrive - ConceptVines\\High Peak\\insurance_data.csv")
# Preprocess the data
policyholder_ids = df['Policyholder_ID'].unique()
n_policyholders = len(policyholder_ids)

# Create a dictionary to map policyholder IDs to integers
policyholder_id_map = {policyholder_id: i for i, policyholder_id in enumerate(policyholder_ids)}

# Create a list to store the preprocessed data
X = []
y = []

# Iterate over the policyholders
for policyholder_id in policyholder_ids:
    # Get the policyholder's data
    policyholder_data = df[df['Policyholder_ID'] == policyholder_id]
    
    # Create a list to store the policyholder's features
    features = []
    
    # Iterate over the policyholder's policies
    for i, policy in policyholder_data.iterrows():
        # Convert dates to timestamps and extract policy features
        policy['Effective_Date'] = pd.to_datetime(policy['Effective_Date'])
        policy['Expiration_Date'] = pd.to_datetime(policy['Expiration_Date'])
        policy_features = [
            policy['Premium'],
            policy['Coverage_Amount'],
            policy['Effective_Date'].timestamp(),
            policy['Expiration_Date'].timestamp()
        ]
        
        # Append the policy features to the list
        features.append(policy_features)
    
    # Convert the list of features to a numpy array and normalize
    features = np.array(features)
    features = (features - features.mean(axis=0)) / features.std(axis=0)
    
    # Append the preprocessed data to the lists
    X.append(features)
    y.append(policyholder_data['Longevity'].iloc[0])

# Convert the lists to numpy arrays
X = np.array(X)
y = np.array(y)
# Define the model architecture
input_shape = (X.shape[1], X.shape[2])
input_layer = Input(shape=input_shape)

# Temporal Convolutional Layer with 'same' padding to handle short sequences
conv_layer = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(input_layer)
conv_layer = MaxPooling1D(pool_size=2, padding='same')(conv_layer)

# Long-Short Term Memory Layer with return_sequences=True
lstm_layer1 = LSTM(units=128, return_sequences=True)(conv_layer)
lstm_layer2 = LSTM(units=64, return_sequences=True)(lstm_layer1)  # Ensuring LSTM outputs sequences

# Attention Mechanism
attention_layer = Attention()([lstm_layer2, lstm_layer2])  # Attention applied here

# Dense Layer
dense_layer = Dense(units=64, activation='relu')(attention_layer)
dense_layer = Dropout(rate=0.2)(dense_layer)

# Output Layer
output_layer = Dense(units=1)(dense_layer)

# Define the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0.001)

# Train the model
model.fit(X, y, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
mse = model.evaluate(X, y)
print(f'MSE: {mse:.2f}')


  features = (features - features.mean(axis=0)) / features.std(axis=0)
  policy['Effective_Date'] = pd.to_datetime(policy['Effective_Date'])
  policy['Expiration_Date'] = pd.to_datetime(policy['Expiration_Date'])


Epoch 1/50




[1m249/250[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - loss: 3445.4199



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - loss: 3432.2346 - val_loss: 96.9242
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 162.9027 - val_loss: 95.5894
Epoch 3/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 154.9041 - val_loss: 96.0665
Epoch 4/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 159.7021 - val_loss: 95.7748
Epoch 5/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 153.7381 - val_loss: 95.8563
Epoch 6/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 157.2836 - val_loss: 95.8217
Epoch 7/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 157.2221 - val_loss: 96.4444
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 101.2740
MSE: 99.21
