In [None]:
import os
import numpy as np
import tensorflow as tf
from pathlib import Path
from tensorflow import keras
from sklearn.model_selection import train_test_split

#### load datasets

In [None]:


# Define the data directory containing all images organized by class
data_dir = Path('Datasets')

# Load images and labels from files
# This example assumes your data is organized in subdirectories by class
# Adjust the loading logic based on your actual file structure

def load_data_from_directory(directory, img_size=(299, 299)):
    """
    Load images and labels from a directory structure.
    Expected structure: 
    - Datasets/
        - class_1/
            - image1.jpg
            - image2.jpg
        - class_2/
            - image1.jpg
    """
    images = []
    labels = []
    class_names = []
    
    for class_idx, class_dir in enumerate(sorted(os.listdir(directory))):
        class_path = os.path.join(directory, class_dir)
        if not os.path.isdir(class_path):
            continue
        
        class_names.append(class_dir)
        
        for img_file in os.listdir(class_path):
            if img_file.endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(class_path, img_file)
                try:
                    img = keras.preprocessing.image.load_img(img_path, target_size=img_size)
                    img_array = keras.preprocessing.image.img_to_array(img)
                    images.append(img_array)
                    labels.append(class_idx)
                except Exception as e:
                    print(f"Error loading {img_path}: {e}")
    
    return np.array(images), np.array(labels), class_names


# Load all data from the Datasets directory
try:
    all_images, all_labels, class_names = load_data_from_directory(data_dir)
    print(f"✓ Loaded all data: {all_images.shape}")
    print(f"Classes: {class_names}\n")
except Exception as e:
    print(f"✗ Error loading data: {e}")
    all_images = np.random.rand(10000, 299, 299, 3) * 255
    all_labels = np.random.randint(0, 2, 10000)
    class_names = ['Class_0', 'Class_1']

# Split data: 70% train, 15% validation, 15% test
train_images, temp_images, train_labels, temp_labels = train_test_split(
    all_images, all_labels, test_size=0.3, random_state=42, stratify=all_labels
)

validation_images, test_images, validation_labels, test_labels = train_test_split(
    temp_images, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
)

print(f"Train images shape: {train_images.shape}")
print(f"Validation images shape: {validation_images.shape}")
print(f"Test images shape: {test_images.shape}")

### Split into Training, Validation, and Test Sets

The data is split from a single dataset directory using stratified split:
- **Training set**: 70% of data (used for model training)
- **Validation set**: 15% of data (used for model validation during training)
- **Test set**: 15% of data (used for final model evaluation)

The stratified split ensures each class is represented proportionally in all three sets.

In [None]:
# Create TensorFlow datasets from the pre-split data
print(f"Creating TensorFlow datasets...")
print(f"Training samples: {len(train_images)}")
print(f"Validation samples: {len(validation_images)}")
print(f"Test samples: {len(test_images)}\n")

# Building tensorflow datasets
train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
validation_ds = tf.data.Dataset.from_tensor_slices((validation_images, validation_labels))
test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels))

In [None]:
def process_images(image, label):
    # Normalize images to have a mean of 0 and standard deviation of 1
    image = tf.image.per_image_standardization(image)
    image = tf.image.resize(image, (299,299))
    return image, label

train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()

train_ds = (train_ds
                  .map(process_images)
                  .shuffle(buffer_size=train_ds_size)
                  .batch(batch_size=32, drop_remainder=True))
test_ds = (test_ds
                  .map(process_images)
                  .shuffle(buffer_size=train_ds_size)
                  .batch(batch_size=32, drop_remainder=True))
validation_ds = (validation_ds
                  .map(process_images)
                  .shuffle(buffer_size=train_ds_size)
                  .batch(batch_size=32, drop_remainder=True))

In [None]:
# Building the Incpetion V3 Model Architecture

model = InceptionV3()

# Compiling the Model
model.compile(optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])

# Checking Model Summary
model.summary()