In [None]:
"""
#In this step, we connected our Jupyter Notebook to the **FARS Traffic Accident dataset** and explored the structure of the files.
Data Loading and Initial Exploration
-----------------------------------
This cell:
1. Loads accident, person, and vehicle datasets into pandas DataFrames.
2. Prints shapes and initial columns for overview.
3. Summarizes selected variables relevant to research question:
   - FATALS (target variable)
   - WEATHER
   - LGT_COND (light condition)
   - ROAD_FNC (road function)
4. Displays the first 50 rows of accident dataset for inspection and Data Wrangler.
"""

import pandas as pd
from pathlib import Path

# Define data directory
DATA_DIR = Path("..") / "data"

# Load datasets
accident = pd.read_csv(DATA_DIR / "accident.csv")
person   = pd.read_csv(DATA_DIR / "person.csv")
vehicle  = pd.read_csv(DATA_DIR / "vehicle.csv")

# Quick overview: dataset shapes and sample columns
print("Shapes -> accident:", accident.shape,
      "| person:", person.shape,
      "| vehicle:", vehicle.shape)

print("\nColumns in accident table:", accident.columns[:10].tolist(), "...")

# Summarize target + key predictors
summary = {
    "Fatalities": accident["FATALS"].value_counts(),
    "Weather": accident["WEATHER"].value_counts().head(),
    "Light Condition": accident["LGT_COND"].value_counts().head(),
    "Road Function": accident["ROAD_FNC"].value_counts().head()
}

for key, val in summary.items():
    print(f"\n--- {key} ---")
    print(val)

# Display sample rows (triggers Data Wrangler in VS Code)
accident.head(50)

