#### Steps to Building a Custom Classifier

1. **Data Collection**  
   Split into training and validation sets.

2. **Data Pipeline Creation**  
   Involves reading data, preprocessing, and grouping where necessary.

3. **Data Augmentation** *(Optional)*  
   Apply if the dataset is small; otherwise, skip.

4. **Define the Model** *(If using pretrained models)*  
   - Remove the last task-specific layers  
   - Append your new classifier head  
   - Freeze the weights of the original layers  
   - Select an optimization algorithm  
   - Define performance evaluation metric(s)

5. **Train and Test**  
   Train the model and evaluate on the validation/test set.

#### Data Organization

In [None]:
from pathlib import Path
import shutil
import random

# Define paths
source_path = Path("/home/vdv/Computer_Vision/dataset/training_set/training_set")
base_path = Path("/home/vdv/Computer_Vision/dataset")  # Base path up to dataset

# Create training and validation folders
training_path = base_path / "training"
validation_path = base_path / "validation"

training_path.mkdir(exist_ok=True)
validation_path.mkdir(exist_ok=True)

# Check for cats and dogs folders in source
cats_source = source_path / "cats"
dogs_source = source_path / "dogs"

# Process each category
for category_name, category_source in [("cats", cats_source), ("dogs", dogs_source)]:
    if category_source.exists() and category_source.is_dir():
        print(f"Processing {category_name}...")
        
        # Create category folders in training and validation
        train_category = training_path / category_name
        val_category = validation_path / category_name
        train_category.mkdir(exist_ok=True)
        val_category.mkdir(exist_ok=True)
        
        # Get all files from source category
        files = [f for f in category_source.iterdir() if f.is_file()]
        
        # Shuffle files for random split
        random.shuffle(files)
        
        # Calculate split point (80% training, 20% validation)
        split_point = int(len(files) * 0.8)
        
        train_files = files[:split_point]
        val_files = files[split_point:]
        
        # Move files to training folder
        for file in train_files:
            shutil.move(str(file), str(train_category / file.name))
        
        # Move files to validation folder
        for file in val_files:
            shutil.move(str(file), str(val_category / file.name))
        
        print(f"  ‚úì {category_name}: {len(train_files)} files to training, {len(val_files)} files to validation")
    else:
        print(f"  ‚ö† {category_name} folder not found at {category_source}")

print("\nFolder structure created:")
print(f"üìÅ {training_path}")
print(f"   üìÅ cats ({len(list((training_path / 'cats').glob('*'))) if (training_path / 'cats').exists() else 0} files)")
print(f"   üìÅ dogs ({len(list((training_path / 'dogs').glob('*'))) if (training_path / 'dogs').exists() else 0} files)")
print(f"üìÅ {validation_path}")
print(f"   üìÅ cats ({len(list((validation_path / 'cats').glob('*'))) if (validation_path / 'cats').exists() else 0} files)")
print(f"   üìÅ dogs ({len(list((validation_path / 'dogs').glob('*'))) if (validation_path / 'dogs').exists() else 0} files)")

print("\n‚ö†Ô∏è  Note: Files have been MOVED from the original location!")