In [16]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore', category=UserWarning)
import os
import shutil
import glob

In [6]:
print(os.getcwd())

C:\Users\user\anaconda3\envs\mlp\Animal classification


In [14]:
dataset_path = r'C:\Users\user\anaconda3\envs\mlp\Animal classification\Dataset'
classes = os.listdir(dataset_path)
print("Number of classes:", len(classes))
print("Class names:", classes[:])  
for cls in classes:
    print(f"{cls}: {len(os.listdir(os.path.join(dataset_path, cls)))} images")

Number of classes: 3
Class names: ['cats', 'dogs', 'snakes']
cats: 1000 images
dogs: 1000 images
snakes: 1000 images


In [None]:
# splitting the dataset
def load_and_split_dataset(data_dir, train_size=0.7, val_size=0.15, output_dir="split_dataset", random_state=42):
    """
    Loads images from class folders and splits them into training, validation, and test sets.
    
    Parameters:
    data_dir: path to dataset directory containing 'cat', 'dog', 'snake' folders
    train_size: proportion of data for training
    val_size: proportion of data for validation
    output_dir: directory to save split datasets
    random_state: seed for reproducibility
    
    Returns:
    Dictionary with split file paths and labels
    """
    # Defining classes and splitings
    classes = ['cat', 'dog', 'snake']
    test_size = 1.0 - train_size - val_size
    
    # Initializing lists to store file paths and labels
    split_data = {
        'train': {'files': [], 'labels': []},
        'val': {'files': [], 'labels': []},
        'test': {'files': [], 'labels': []}
    }
    
    # Creating output directories
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    for split in ['train', 'val', 'test']:
        for cls in classes:
            os.makedirs(os.path.join(output_dir, split, cls), exist_ok=True)
    
    # Processing each class
    for cls in classes:
        class_dir = os.path.join(data_dir, cls)
        if not os.path.exists(class_dir):
            raise FileNotFoundError(f"Directory {class_dir} not found")
        
        # Get all image files
        files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
        if len(files) != 1000:
            print(f"Warning: Found {len(files)} images in {class_dir}, expected 1000")
        
        # Create labels for this class
        labels = [cls] * len(files)
        
        # First split: train vs (val + test)
        train_files, temp_files, train_labels, temp_labels = train_test_split(
            files, labels, train_size=train_size, random_state=random_state
        )
        
        # Adjust val_size for second split
        relative_val_size = val_size / (val_size + test_size)
        
        # Second split: val vs test
        val_files, test_files, val_labels, test_labels = train_test_split(
            temp_files, temp_labels, train_size=relative_val_size, random_state=random_state
        )
        
        # Store in split_data
        split_data['train']['files'].extend(train_files)
        split_data['train']['labels'].extend(train_labels)
        split_data['val']['files'].extend(val_files)
        split_data['val']['labels'].extend(val_labels)
        split_data['test']['files'].extend(test_files)
        split_data['test']['labels'].extend(test_labels)
        
        # Copy files to output directories
        for split, split_files in [('train', train_files), ('val', val_files), ('test', test_files)]:
            for file_path in split_files:
                dest_path = os.path.join(output_dir, split, cls, os.path.basename(file_path))
                shutil.copy(file_path, dest_path)
    
    # Print summary
    for split in ['train', 'val', 'test']:
        print(f"{split.capitalize()} set: {len(split_data[split]['files'])} images")
        for cls in classes:
            cls_count = split_data[split]['labels'].count(cls)
            print(f"  {cls}: {cls_count} images")
    
    return split_data

# Example usage
# data_dir = "path/to/your/dataset"  # Directory with 'cat', 'dog', 'snake' folders
# split_data = load_and_split_dataset(data_dir, output_dir="split_animal_dataset")