# Claims Analysis Project - Everwell Assurance Group
### Author: Rithesh R | Data Analyst

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load Data
df = pd.read_csv('claims_data.csv')
print('Data loaded successfully ✅')
df.head()

In [None]:
# Explore Data
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Data Cleaning
df['ClaimAmount'] = df['ClaimAmount'].fillna(df['ClaimAmount'].median())
df['Status'] = df['Status'].fillna('Pending')

In [None]:
# Feature Engineering
df['ProcessingTime'] = (pd.to_datetime(df['SettlementDate']) - pd.to_datetime(df['SubmissionDate'])).dt.days
df['FraudFlag'] = np.where(df['ClaimAmount'] > df['ClaimAmount'].quantile(0.95), 1, 0)

In [None]:
# Exploratory Data Analysis
plt.figure(figsize=(8,5))
sns.histplot(df['ProcessingTime'], bins=30)
plt.title('Distribution of Claim Processing Time')
plt.show()

plt.figure(figsize=(6,4))
sns.boxplot(x='ClaimType', y='ClaimAmount', data=df)
plt.title('Claim Amount by Type')
plt.show()

In [None]:
# Fraud Analysis
fraud_summary = df.groupby('ClaimType')['FraudFlag'].mean().sort_values(ascending=False)
print(fraud_summary)

In [None]:
# KPI Summary
avg_processing = df['ProcessingTime'].mean()
fraud_rate = df['FraudFlag'].mean() * 100
print(f'Average Processing Time: {avg_processing:.2f} days')
print(f'Fraud Rate: {fraud_rate:.2f}%')

In [None]:
# Save Cleaned Data
df.to_csv('claims_data_cleaned.csv', index=False)
print('Cleaned data saved ✅')