# Quick EDA for Tech-Triathlon 2025 Datathon

This notebook provides a quick exploratory data analysis of the training datasets.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add src to path for imports
sys.path.append('../src')

from io_safe import read_csv_safe
from paths import RAW_DATA_DIR

# Set plot style
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")
print(f"Data directory: {RAW_DATA_DIR}")

In [None]:
# Load datasets
datasets = {}

# Try to load each dataset
data_files = {
    'bookings': 'bookings_train.csv',
    'tasks': 'tasks.csv',
    'staffing': 'staffing_train.csv',
    'task1_test': 'task1_test_inputs.csv',
    'task2_test': 'task2_test_inputs.csv'
}

for name, filename in data_files.items():
    filepath = RAW_DATA_DIR / filename
    try:
        datasets[name] = read_csv_safe(filepath)
        print(f"✓ Loaded {name}: {len(datasets[name])} rows, {len(datasets[name].columns)} columns")
    except Exception as e:
        print(f"✗ Could not load {name}: {e}")
        datasets[name] = None

print(f"\nLoaded {sum(1 for v in datasets.values() if v is not None)}/{len(datasets)} datasets")

In [None]:
# Quick preview of each dataset
for name, df in datasets.items():
    if df is not None:
        print(f"\n{'='*50}")
        print(f"Dataset: {name.upper()}")
        print(f"Shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        
        # Show basic info
        print("\nData types:")
        print(df.dtypes)
        
        # Show first few rows
        print("\nFirst 3 rows:")
        display(df.head(3))
        
        # Show missing values
        missing = df.isnull().sum()
        if missing.sum() > 0:
            print("\nMissing values:")
            print(missing[missing > 0])
        else:
            print("\n✓ No missing values")
    else:
        print(f"\n{name.upper()}: Not available")