# Exploratory Data Analysis (EDA)

This notebook performs exploratory data analysis on the image classification dataset.

## Objectives
1. Load and inspect the dataset
2. Analyze data distribution
3. Visualize sample images
4. Identify preprocessing requirements

In [None]:
# Imports
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Add src to path
sys.path.append(str(Path('..')))

from src.data.data_loader import DataLoader

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [None]:
# Load data
data_dir = Path('../data')
loader = DataLoader(str(data_dir))

# Load training data
X_train, X_val, y_train, y_val = loader.load_train_data(validation_split=0.15)

# Get class names
class_names = loader.get_class_names()
print(f"Number of classes: {len(class_names)}")
print(f"Class names: {class_names[:10]}...")  # Show first 10