# Task 1 â€“ Exploratory Data Analysis (EDA)

This notebook performs initial EDA on the insurance claims dataset using the
modular code in the `src` package. It focuses on:

- Understanding the basic structure of the data.
- Computing overall and grouped loss ratios.
- Preparing for more detailed visual analysis in later notebooks.


In [None]:
# Set up imports and configuration
import sys
from pathlib import Path

# Determine project root by searching upwards for the `src` directory
project_root = Path.cwd().resolve()
if not (project_root / "src").exists():
    # If running from notebooks/ or another subdir, move one level up
    candidate = project_root.parent
    if (candidate / "src").exists():
        project_root = candidate

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.data_loader import DataLoader
from src.eda_summary import compute_loss_ratio_overall, compute_loss_ratio_by_group

loader = DataLoader.from_config()
df = loader.load_machine_learning_rating()
df.head()

ModuleNotFoundError: No module named 'src'

In [None]:
# Basic info and summary statistics
df.info()


In [None]:
# Overall loss ratio for the portfolio
overall_lr = compute_loss_ratio_overall(df)
overall_lr


In [None]:
# Loss ratio by Province
lr_by_province = compute_loss_ratio_by_group(df, ["Province"])
lr_by_province.sort_values('loss_ratio', ascending=False).head()


In [None]:
# Loss ratio by VehicleType
lr_by_vehicle_type = compute_loss_ratio_by_group(df, ["VehicleType"])
lr_by_vehicle_type.sort_values('loss_ratio', ascending=False).head()
