# Exploratory Data Analysis

This notebook provides a template for performing comprehensive EDA on the insurance risk analytics dataset.


In [None]:
# Import necessary libraries
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add src to path
project_root = Path().resolve().parent
sys.path.append(str(project_root))

from src.data.load_data import load_insurance_data, get_data_info
from src.eda.exploratory_analysis import InsuranceEDA

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
%matplotlib inline


## 1. Load Data


In [None]:
# Load the insurance data
df = load_insurance_data()
print(f"Data shape: {df.shape}")
df.head()


## 2. Data Information


In [None]:
# Get basic information
info = get_data_info(df)
print(f"Shape: {info['shape']}")
print(f"Memory Usage: {info['memory_usage_mb']:.2f} MB")
print(f"\nColumns: {len(info['columns'])}")


## 3. Data Quality Assessment


In [None]:
# Check for missing values
missing = df.isnull().sum()
missing_pct = (missing / len(df) * 100).round(2)
missing_df = pd.DataFrame({
    'Missing Count': missing,
    'Missing Percentage': missing_pct
})
missing_df[missing_df['Missing Count'] > 0].sort_values('Missing Count', ascending=False)


## 4. Descriptive Statistics


In [None]:
# Numerical columns statistics
numerical_cols = df.select_dtypes(include=[np.number]).columns
df[numerical_cols].describe()


## 5. Loss Ratio Analysis


In [None]:
# Initialize EDA class
eda = InsuranceEDA()

# Calculate overall loss ratio
overall_loss = eda.calculate_loss_ratio()
print(f"Overall Loss Ratio: {overall_loss['LossRatio'].iloc[0]:.4f}")

# Loss ratio by Province
if 'Province' in df.columns:
    loss_by_province = eda.calculate_loss_ratio(['Province'])
    print("\nLoss Ratio by Province:")
    print(loss_by_province.sort_values('LossRatio', ascending=False))


## 6. Visualizations


In [None]:
# Generate all visualizations
eda.plot_univariate_distributions()
eda.plot_bivariate_analysis()
eda.plot_geographic_trends()
eda.plot_outliers()
eda.plot_temporal_trends()
eda.create_creative_visualizations()

print("All visualizations saved to reports/ directory")


## 7. Custom Analysis

Add your own custom analysis here...
