In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, RandomFlip, RandomRotation, RandomZoom
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.regularizers import l2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet50_preprocess
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Enable mixed precision to reduce memory usage and speed up training
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

# Enable XLA for GPU optimization to improve performance
tf.config.optimizer.set_jit(True)

# Set seeds for reproducibility to ensure consistent results across runs
tf.random.set_seed(42)
np.random.seed(42)

# Define paths and hyperparameters
CSV_PATH = "../Dataset/image_labels.csv"  # Path to the CSV file containing image paths and labels
IMG_DIR = "../Dataset/interior"           # Directory containing the images
IMG_HEIGHT, IMG_WIDTH = 224, 224          # Image dimensions for resizing (ResNet50 expects 224x224)
BATCH_SIZE = 32                           # Batch size for training (reduced to manage memory)
EPOCHS = 50                               # Maximum number of epochs for training
MODEL_NAME = 'resnet50'                   # Model identifier for file naming

# Generate a CSV file mapping image paths to their labels
def regenerate_csv(image_dir, output_file):
    """
    Scans the image directory, matches filenames to class labels based on predefined variations,
    and saves the mapping to a CSV file.
    
    Args:
        image_dir (str): Directory containing the images.
        output_file (str): Path to save the generated CSV file.
    
    Returns:
        pd.DataFrame: DataFrame containing image paths and labels.
    """
    classes = ['bath', 'bed', 'dining room', 'kitchen', 'living room']
    class_variations = {
        'bath': ['bath', 'bathroom'], 'bed': ['bed', 'bedroom'],
        'dining room': ['dining', 'dining_room', 'diningroom', 'din'],
        'kitchen': ['kitchen'], 'living room': ['living', 'living_room', 'livingroom']
    }
    data = []
    for filename in os.listdir(image_dir):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            matched = False
            for cls in classes:
                for variation in class_variations[cls]:
                    if variation.lower() in filename.lower():
                        data.append({'image_path': os.path.join(image_dir, filename), 'label': cls})
                        matched = True
                        break
                if matched:
                    break
    df = pd.DataFrame(data)
    df.to_csv(output_file, index=False)
    print(f"Regenerated CSV with {len(df)} images")
    return df

# Verify that all image paths in the CSV exist
def verify_data(csv_path, img_dir):
    """
    Verifies that all image paths listed in the CSV file exist in the image directory.
    
    Args:
        csv_path (str): Path to the CSV file.
        img_dir (str): Directory containing the images.
    
    Raises:
        ValueError: If any image file is missing.
    """
    df = pd.read_csv(csv_path)
    print(f"Total images: {len(df)}, Classes: {df['label'].value_counts()}")
    missing = [path for path in df['image_path'] if not os.path.exists(path)]
    if missing:
        raise ValueError(f"Missing files: {missing}")

# Load and preprocess the dataset for training with ResNet50-specific preprocessing
def load_and_preprocess_data(csv_path, img_dir):
    """
    Loads the dataset from a CSV file, encodes labels, splits into training and validation sets,
    and creates TensorFlow datasets with ResNet50-specific preprocessing.
    
    Args:
        csv_path (str): Path to the CSV file.
        img_dir (str): Directory containing the images.
    
    Returns:
        tuple: (train_dataset, val_dataset, num_classes, label_encoder, val_df, class_weight_dict)
    """
    # Load the CSV file into a DataFrame
    df = pd.read_csv(csv_path)
    
    # Encode the string labels (e.g., 'bath') into integers (e.g., 0, 1, ...)
    label_encoder = LabelEncoder()
    df['label_encoded'] = label_encoder.fit_transform(df['label'])
    num_classes = len(label_encoder.classes_)
    
    # Split the data into training (80%) and validation (20%) sets with stratification
    train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
    
    # Compute class weights to handle slight imbalances in the dataset
    class_weights = compute_class_weight('balanced', classes=np.unique(df['label_encoded']), y=df['label_encoded'])
    class_weight_dict = dict(enumerate(class_weights))
    
    # Define a helper function to load and preprocess an image
    def load_image(image_path, label):
        img = tf.io.read_file(image_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.cast(img, tf.float32)
        # Apply ResNet50-specific preprocessing (normalizes to [-1, 1])
        img = resnet50_preprocess(img)
        return img, label
    
    # Create TensorFlow datasets for training and validation with caching, shuffling, batching, and prefetching
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_df['image_path'], tf.keras.utils.to_categorical(train_df['label_encoded'], num_classes))
    ).map(load_image, num_parallel_calls=tf.data.AUTOTUNE).cache().shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    val_dataset = tf.data.Dataset.from_tensor_slices(
        (val_df['image_path'], tf.keras.utils.to_categorical(val_df['label_encoded'], num_classes))
    ).map(load_image, num_parallel_calls=tf.data.AUTOTUNE).cache().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    
    return train_dataset, val_dataset, num_classes, label_encoder, val_df, class_weight_dict

# Build the pretrained ResNet50 model
def build_pretrained_model(num_classes):
    """
    Builds a ResNet50-based model with data augmentation, a frozen base, and a custom head.
    
    Args:
        num_classes (int): Number of output classes (5 in this case).
    
    Returns:
        tf.keras.Model: Compiled ResNet50 model.
    """
    # Load ResNet50 with ImageNet weights, excluding the top layers
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
    
    # Freeze most layers to prevent overfitting, unfreeze the last 20 layers for fine-tuning
    base_model.trainable = True
    for layer in base_model.layers[:-20]:
        layer.trainable = False
    
    # Build the model with augmentation layers and a custom head
    model = Sequential([
        # Data augmentation to improve generalization
        RandomFlip("horizontal", seed=42),
        RandomRotation(0.1, seed=42),
        RandomZoom(0.1, seed=42),
        # Base ResNet50 model
        base_model,
        # Pooling to reduce spatial dimensions
        GlobalAveragePooling2D(),
        # Dense layer with increased capacity and regularization
        Dense(1024, activation='relu', kernel_regularizer=l2(0.01)),
        Dropout(0.5),
        # Output layer with softmax for classification
        Dense(num_classes, activation='softmax', dtype='float32')
    ])
    return model

# Plot the training history
def plot_training_history(history):
    """
    Plots training and validation accuracy/loss and saves the plot.
    
    Args:
        history: Training history object from model.fit().
    """
    plt.figure(figsize=(12, 4))
    for metric in ['accuracy', 'loss']:
        plt.subplot(1, 2, 1 if metric == 'accuracy' else 2)
        plt.plot(history.history[metric], label=f'Training {metric.capitalize()}')
        plt.plot(history.history[f'val_{metric}'], label=f'Validation {metric.capitalize()}')
        plt.title(f'Training and Validation {metric.capitalize()}')
        plt.legend()
    plt.savefig(f'training_history_{MODEL_NAME}.png')
    plt.close()

# Plot the confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    """
    Generates and saves a confusion matrix heatmap.
    
    Args:
        y_true: True labels.
        y_pred: Predicted labels.
        classes: List of class names.
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig(f'confusion_matrix_{MODEL_NAME}.png')
    plt.close()

# Main function to execute the pipeline
def main():
    """
    Executes the full pipeline: data preparation, model training, evaluation, and visualization.
    """
    # Generate the CSV file with image paths and labels
    df = regenerate_csv(IMG_DIR, CSV_PATH)
    if df is None:
        return
    
    # Verify that all image paths exist
    verify_data(CSV_PATH, IMG_DIR)
    
    # Load and preprocess the dataset
    train_dataset, val_dataset, num_classes, label_encoder, val_df, class_weight_dict = load_and_preprocess_data(CSV_PATH, IMG_DIR)
    
    # Build the ResNet50 model
    model = build_pretrained_model(num_classes)
    
    # Compile the model with a lower learning rate for fine-tuning
    model.compile(optimizer=Adam(learning_rate=3e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model.build((None, IMG_HEIGHT, IMG_WIDTH, 3))
    model.summary()
    
    # Define callbacks for training
    callbacks = [
        # Stop training if validation accuracy doesn't improve for 15 epochs
        EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True),
        # Reduce learning rate if validation accuracy plateaus for 5 epochs
        ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, min_lr=1e-6),
        # Log training metrics to a CSV file
        CSVLogger(f'training_log_{MODEL_NAME}.csv', append=True)
    ]
    
    # Train the model
    history = model.fit(
        train_dataset, epochs=EPOCHS, validation_data=val_dataset,
        callbacks=callbacks, class_weight=class_weight_dict, verbose=1
    )
    
    # Evaluate the model on the validation set
    val_loss, val_accuracy = model.evaluate(val_dataset)
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")
    
    # Save the trained model and label encoder
    model.save(f'model_{MODEL_NAME}.keras')
    np.save('label_encoder_classes.npy', label_encoder.classes_)
    
    # Plot training and validation accuracy/loss
    plot_training_history(history)
    
    # Get sample predictions to inspect model performance
    val_images, val_labels = next(iter(val_dataset))
    predictions = model.predict(val_images)
    predicted_labels = label_encoder.inverse_transform(np.argmax(predictions, axis=1))
    true_labels = label_encoder.inverse_transform(np.argmax(val_labels, axis=1))
    print("Sample Predictions:", *[(t, p) for t, p in zip(true_labels[:10], predicted_labels[:10])], sep='\n')
    
    # Generate a classification report and confusion matrix
    val_predictions = model.predict(val_dataset)
    val_pred_labels = np.argmax(val_predictions, axis=1)
    val_true_labels = np.argmax(np.concatenate([y for _, y in val_dataset]), axis=1)
    report = classification_report(val_true_labels, val_pred_labels, target_names=label_encoder.classes_)
    print("\nClassification Report:\n", report)
    
    # Save the classification report
    with open(f'classification_report_{MODEL_NAME}.txt', 'w') as f:
        f.write(report)
    
    # Plot and save the confusion matrix
    plot_confusion_matrix(val_true_labels, val_pred_labels, label_encoder.classes_)

if __name__ == "__main__":
    main()

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3060 Laptop GPU, compute capability 8.6
Num GPUs Available:  1
Regenerated CSV with 12335 images
Total images: 12335, Classes: label
living room    2621
dining room    2605
bed            2445
bath           2430
kitchen        2234
Name: count, dtype: int64
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_flip (RandomFlip)    (None, 224, 224, 3)       0         
                                                                 
 random_rotation (RandomRota  (None, 224, 224, 3)      0         
 tion)                                                           
                                                                 
 random_zoom (RandomZoom)    (None, 224, 224, 3