In [1]:
# Loading and Set-up Python Data Repositories
import os
import json # For loading the config file
import pandas as pd
import numpy as np
import cv2 # OpenCV for image processing
from sklearn.metrics import f1_score, classification_report
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt
import seaborn as sns

# Setting white grid plot style
sns.set_style("whitegrid")

# --- Configuration ---
IMG_SIZE = (224, 224) # Target image size for the model
EPOCHS = 5
BATCH_SIZE = 32

# --- 1. Loading Path for Configuration File ---
print("--- Step 1: Loading Configuration ---")
config_path = 'config.json'
try:
    with open(config_path, 'r') as f:
        config = json.load(f)
    base_data_folder = config['data_folder_path'] # Parent folder with 'training' and 'testing'
    print("✅ Configuration loaded successfully.")
except FileNotFoundError:
    print(f"❌ Error: The configuration file '{config_path}' was not found.")
    exit()
except KeyError:
    print(f"❌ Error: 'data_folder_path' not found in '{config_path}'.")
    exit()

if not os.path.exists(base_data_folder):
    print(f"❌ Error: Base data folder does not exist: {base_data_folder}")
    exit()
else:
     print(f"✅ Using base data folder: {base_data_folder}")

# --- 2. Load the Image Paths and Labels from the Directory Structure ---
print("\n--- Step 2: Loading Image Paths and Labels ---")

def load_data_from_folders(folder_path):
    """Loads image paths and assigns labels based on subfolder names."""
    image_paths = []
    labels = []
    # Specifye subfolders and their corresponding labels
    subfolders = {'flip': 1, 'notflip': 0}

    for label_name, label_value in subfolders.items():
        subfolder_path = os.path.join(folder_path, label_name)
        if not os.path.exists(subfolder_path):
            print(f"Warning: Subfolder not found: {subfolder_path}")
            continue

        print(f"Loading images from: {subfolder_path} (Label: {label_value})")
        for filename in os.listdir(subfolder_path):
            # Check for common image file extensions
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_paths.append(os.path.join(subfolder_path, filename))
                labels.append(label_value)
            else:
                 print(f"Skipping non-image file: {filename}")
                 
    return image_paths, labels

# Load training data
train_folder = os.path.join(base_data_folder, 'training')
train_image_paths, train_labels = load_data_from_folders(train_folder)

# Load testing/validation data
test_folder = os.path.join(base_data_folder, 'testing')
val_image_paths, val_labels = load_data_from_folders(test_folder)

if not train_image_paths or not val_image_paths:
    print("❌ Error: No images found in training or testing directories. Please check folder structure and contents.")
    exit()

print(f"\n✅ Data loading complete.")
print(f"Found {len(train_image_paths)} training images and {len(val_image_paths)} validation images.")

# --- 3. Data Preprocessing ---
print("\n--- Step 3: Preprocessing Data ---")

def preprocess_image(image_path):
    """Loads, resizes, and normalizes a single image."""
    try:
        img = load_img(image_path, target_size=IMG_SIZE)
        img_array = img_to_array(img)
        img_array = img_array / 255.0 # Normalize pixel values
        return img_array
    except Exception as e:
        print(f"Warning: Error processing image {os.path.basename(image_path)}: {e}")
        return None

print("Preprocessing training images...")
X_train_processed = [preprocess_image(path) for path in train_image_paths]
print("Preprocessing validation images...")
X_val_processed = [preprocess_image(path) for path in val_image_paths]

# Filter out None values (failed preprocessing) and corresponding labels
train_valid_indices = [i for i, img in enumerate(X_train_processed) if img is not None]
val_valid_indices = [i for i, img in enumerate(X_val_processed) if img is not None]

X_train = np.array([X_train_processed[i] for i in train_valid_indices])
y_train = np.array([train_labels[i] for i in train_valid_indices])
X_val = np.array([X_val_processed[i] for i in val_valid_indices])
y_val = np.array([val_labels[i] for i in val_valid_indices])

if len(X_train) == 0 or len(X_val) == 0:
    print("❌ Error: No valid images could be processed after preprocessing. Cannot continue.")
    exit()

print(f"✅ Preprocessing complete. Using {len(X_train)} training images and {len(X_val)} validation images.")

# --- 4. Build the Model (Transfer Learning) ---
print("\n--- Step 4: Building the Model ---")
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False # Freeze base model layers

# Add custom classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x) # Sigmoid for binary output (0 or 1)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy', # Correct loss for binary classification
              metrics=['accuracy']) # Can add other metrics like tf.keras.metrics.AUC()

print("✅ Model built successfully.")
# model.summary() # Optional: See model structure

# --- 5. Train the Model ---
print("\n--- Step 5: Training the Model ---")
history = model.fit(X_train, y_train,
                    epochs=EPOCHS,
                    batch_size=BATCH_SIZE,
                    validation_data=(X_val, y_val),
                    verbose=1) # Show progress for each epoch
print("✅ Model training complete.")

# --- 6. Evaluate Performance (F1 Score) ---
print("\n--- Step 6: Evaluating Performance ---")
# Get predictions on the validation set (probabilities)
y_pred_proba = model.predict(X_val)

# Convert probabilities to binary class predictions (0 or 1)
y_pred_binary = (y_pred_proba > 0.5).astype(int)

# Calculate F1 score (your primary success metric)
f1 = f1_score(y_val, y_pred_binary)
print(f"\nValidation F1 Score: {f1:.4f}")

# Print a detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred_binary, target_names=['notflip (0)', 'flip (1)']))

# --- Optional: Visualize Training History ---
# plt.figure(figsize=(12, 4))
# plt.subplot(1, 2, 1)
# plt.plot(history.history['loss'], label='Train Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.legend()
# plt.title('Loss Over Epochs')

# plt.subplot(1, 2, 2)
# plt.plot(history.history['accuracy'], label='Train Accuracy')
# plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
# plt.legend()
# plt.title('Accuracy Over Epochs')
# plt.show()

--- Step 1: Loading Configuration ---
✅ Configuration loaded successfully.
✅ Using base data folder: D:\Apziva\MonReader Project\images\images

--- Step 2: Loading Image Paths and Labels ---
Loading images from: D:\Apziva\MonReader Project\images\images\training\flip (Label: 1)
Loading images from: D:\Apziva\MonReader Project\images\images\training\notflip (Label: 0)
Loading images from: D:\Apziva\MonReader Project\images\images\testing\flip (Label: 1)
Loading images from: D:\Apziva\MonReader Project\images\images\testing\notflip (Label: 0)

✅ Data loading complete.
Found 2392 training images and 597 validation images.

--- Step 3: Preprocessing Data ---
Preprocessing training images...
Preprocessing validation images...
✅ Preprocessing complete. Using 2392 training images and 597 validation images.

--- Step 4: Building the Model ---
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_n