# Data Preprocessing
## Endangered Species Image Classifier

This notebook handles preprocessing of images and labels for the model.

In [None]:
# Import required libraries
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
from PIL import Image
import torch
from torchvision import transforms
import matplotlib.pyplot as plt

from src.preprocessing import *
from src.data_loader import *
from config.model_config import MODEL_CONFIG

## Image Transformations

In [None]:
# Define image transformations
train_transform = transforms.Compose([
    transforms.Resize(MODEL_CONFIG['input_size']),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(MODEL_CONFIG['input_size']),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

print("Transformations defined successfully!")

## Label Encoding

In [None]:
# Test conservation label encoding
test_labels = ['CR', 'EN', 'VU', 'NT', 'LC']
print("Conservation Label Encoding:")
for label in test_labels:
    encoded = encode_conservation_label(label)
    decoded = decode_conservation_label(encoded)
    print(f"  {label} -> {encoded} -> {decoded}")

In [None]:
# Test geographic region encoding
test_regions = [['Africa'], ['Asia', 'Europe'], ['North America', 'Marine']]
print("\nGeographic Region Encoding:")
for regions in test_regions:
    encoded = encode_geographic_regions(regions)
    decoded = decode_geographic_regions(encoded)
    print(f"  {regions} -> {decoded}")

## Create Data Loaders

In [None]:
# Load and prepare dataloaders
print("Creating data loaders...")

try:
    dataset = load_species_data()
    train_loader, val_loader, test_loader = create_dataloaders(dataset)
    
    print(f"Train batches: {len(train_loader)}")
    print(f"Validation batches: {len(val_loader)}")
    print(f"Test batches: {len(test_loader)}")
except Exception as e:
    print(f"Error creating dataloaders: {e}")
    print("Will use placeholder data for demonstration")

## Visualize Augmented Images

In [None]:
# Show examples of data augmentation
print("Data augmentation examples:")
print("- Random horizontal flip")
print("- Random rotation (±15°)")
print("- Color jitter (brightness, contrast)")
print("- Normalization")

## Save Preprocessed Data Info

In [None]:
# Save preprocessing configuration
preprocessing_info = {
    'input_size': MODEL_CONFIG['input_size'],
    'normalization_mean': [0.485, 0.456, 0.406],
    'normalization_std': [0.229, 0.224, 0.225],
    'augmentation': ['horizontal_flip', 'rotation', 'color_jitter']
}

print("\nPreprocessing Configuration:")
for key, value in preprocessing_info.items():
    print(f"  {key}: {value}")

print("\nData preprocessing complete!")