In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta

# Load all tables
path = "Dataset\Healthcare Insurance Claims Management Dataset.xlsx"

claims = pd.read_excel(path, sheet_name='Claims_Fact_raw', parse_dates=[
    'ClaimSubmissionDate','AdmissionDate','DischargeDate','QueryRaiseDate','SettlementDate'
])
procedures = pd.read_excel(path, sheet_name='Procedure_Master')
policies = pd.read_excel(path, sheet_name='Policy_Master_raw')
providers = pd.read_excel(path, sheet_name='Provider_Master_raw')
diagnosis = pd.read_excel(path, sheet_name='Diagnosis_Master_ICD')

# Normalize numeric
claims['ClaimedAmount'] = pd.to_numeric(claims['ClaimedAmount'], errors='coerce')
claims['ApprovedAmount'] = pd.to_numeric(claims['ApprovedAmount'], errors='coerce')

## Compute LOS (Length of Stay)

In [2]:
claims['LOS_days'] = (claims['DischargeDate'] - claims['AdmissionDate']).dt.days

## Claim Cycle Time

In [3]:
claims['ClaimCycleTime_days'] = (claims['SettlementDate'] - claims['ClaimSubmissionDate']).dt.days

## Amount Variance & Variance %

In [4]:
claims['AmountVariance'] = claims['ClaimedAmount'] - claims['ApprovedAmount']

In [5]:
claims['VariancePercent'] = np.where(
    (claims['ClaimedAmount'].notna()) & (claims['ClaimedAmount'] != 0),
    (claims['ApprovedAmount'] / claims['ClaimedAmount']) * 100,
    np.nan
)