# Split data into 5 folds

In [None]:
import os
import shutil
import random
from collections import defaultdict

# Paths
input_good_dir = '/Users/suzetteschulenburg/Desktop/Nuutse/Good'
input_bad_dir = '/Users/suzetteschulenburg/Desktop/Nuutse/Bad'
output_base_dir = '/Users/suzetteschulenburg/Desktop/Main2'

# Number of folds
num_folds = 5

# Function to create directories for folds
def create_directories(base_dir, num_folds):
    for fold in range(1, num_folds + 1):
        for category in ['Good', 'Bad']:
            os.makedirs(os.path.join(base_dir, f'Fold{fold}', category), exist_ok=True)

create_directories(output_base_dir, num_folds)

# Function to group images by base ID
def collect_images_by_base_id(directory):
    base_id_dict = defaultdict(list)
    for fname in os.listdir(directory):
        if fname.lower().endswith('.jpg'):
            # Extract base ID (e.g., E2025 from E2025_IMG_8469_aug1.jpg)
            base_id = fname.split('_')[0]
            img_path = os.path.join(directory, fname)
            base_id_dict[base_id].append(img_path)
    return base_id_dict

# Collect images by base ID
good_images_by_base_id = collect_images_by_base_id(input_good_dir)
bad_images_by_base_id = collect_images_by_base_id(input_bad_dir)

# Combine all base IDs and shuffle
all_base_ids = list(good_images_by_base_id.keys()) + list(bad_images_by_base_id.keys())
random.seed(42)
random.shuffle(all_base_ids)

# Split base IDs into folds
folds = [[] for _ in range(num_folds)]
for i, base_id in enumerate(all_base_ids):
    folds[i % num_folds].append(base_id)

# Distribute images into folds
for fold_idx, fold_base_ids in enumerate(folds, 1):
    fold_dir = os.path.join(output_base_dir, f'Fold{fold_idx}')

    for base_id in fold_base_ids:
        # Check if the base ID belongs to Good or Bad
        if base_id in good_images_by_base_id:
            category = 'Good'
            images = good_images_by_base_id[base_id]
        elif base_id in bad_images_by_base_id:
            category = 'Bad'
            images = bad_images_by_base_id[base_id]
        else:
            continue

        # Copy images to the respective fold directory
        for img_path in images:
            shutil.copy(img_path, os.path.join(fold_dir, category, os.path.basename(img_path)))

print("Data successfully split into folds with no overlapping base IDs.")

Check no overlap

In [None]:
import os
from collections import defaultdict

# Paths to your folds and test set
fold_base_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_base_dir = '/Users/suzetteschulenburg/Desktop/Main/Test'

# Function to collect base IDs from a directory
def collect_base_ids(directory, categories=['Good', 'Bad']):
    base_ids = defaultdict(list)
    for category in categories:
        category_dir = os.path.join(directory, category)
        if not os.path.exists(category_dir):
            continue
        for fname in os.listdir(category_dir):
            if fname.lower().endswith('.jpg'):
                base_id = fname.split('_')[0]  # Extract base ID
                base_ids[base_id].append(os.path.join(category_dir, fname))
    return base_ids

# Collect base IDs from folds
fold_base_ids = {}
for fold in range(1, 6):  # Assuming 5 folds
    fold_dir = os.path.join(fold_base_dir, f'Fold{fold}')
    fold_base_ids[f'Fold{fold}'] = set(collect_base_ids(fold_dir).keys())

# Collect base IDs from test set
test_base_ids = set(collect_base_ids(test_base_dir).keys())

# Check for overlaps between folds
overlap_found = False
print("Checking for overlaps between folds:")
for fold1 in fold_base_ids:
    for fold2 in fold_base_ids:
        if fold1 != fold2:
            overlap = fold_base_ids[fold1].intersection(fold_base_ids[fold2])
            if overlap:
                overlap_found = True
                print(f"Overlap found between {fold1} and {fold2}: {overlap}")

if not overlap_found:
    print("No overlap detected between folds!")

# Check for overlaps between test set and folds
test_overlap_found = False
print("\nChecking for overlaps between test set and folds:")
for fold, fold_ids in fold_base_ids.items():
    overlap = fold_ids.intersection(test_base_ids)
    if overlap:
        test_overlap_found = True
        print(f"Overlap found between test set and {fold}: {overlap}")

if not test_overlap_found:
    print("No overlap detected between test set and any folds!")

# Example: Print a few base IDs from each fold and the test set
print("\nExample Base IDs from Each Fold and Test Set:")
for fold, base_ids in fold_base_ids.items():
    print(f"{fold}: {list(base_ids)[:5]}")  # Print first 5 base IDs for each fold
print(f"Test Set: {list(test_base_ids)[:5]}")

Count base ids per fold

In [None]:
import os
from collections import defaultdict

# Base directory containing folds
base_directory = '/Users/suzetteschulenburg/Desktop/MainUse2'

# Function to count unique base IDs in each class per fold
def count_unique_base_ids_per_fold(base_directory):
    folds_data = {}
    for fold in range(1, 6):  # Iterate over folds 1 to 5
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_base_id_counts = {}
        for class_name in ['Good', 'Bad']:  # Check for 'Good' and 'Bad' classes
            class_dir = os.path.join(fold_dir, class_name)
            if os.path.exists(class_dir):
                base_ids = set()
                for filename in os.listdir(class_dir):
                    if filename.lower().endswith('.jpg'):
                        # Extract base ID (e.g., E2025 from E2025_IMG_8469_sharp.jpg)
                        base_id = filename.split('_')[0]
                        base_ids.add(base_id)
                class_base_id_counts[class_name] = len(base_ids)
            else:
                class_base_id_counts[class_name] = 0
        folds_data[f'Fold{fold}'] = class_base_id_counts
    return folds_data

# Get unique base ID counts per fold
folds_data = count_unique_base_ids_per_fold(base_directory)

# Print the results
for fold, counts in folds_data.items():
    print(f"{fold}: {counts}")

Count total number of images per fold

In [None]:
import os

# Base directory containing folds
base_directory = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# Function to count images in each class per fold
def count_images_per_fold(base_directory):
    folds_data = {}
    for fold in range(1, 6):  # Iterate over folds 1 to 5
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_counts = {}
        for class_name in ['Good', 'Bad']:  # Check for 'Good' and 'Bad' classes
            class_dir = os.path.join(fold_dir, class_name)
            if os.path.exists(class_dir):
                num_images = len([f for f in os.listdir(class_dir) if f.lower().endswith('.jpg')])
                class_counts[class_name] = num_images
            else:
                class_counts[class_name] = 0
        folds_data[f'Fold{fold}'] = class_counts
    return folds_data

# Get image counts per fold
folds_data = count_images_per_fold(base_directory)

# Print the results
for fold, counts in folds_data.items():
    print(f"{fold}: {counts}")

Versprei beter

In [None]:
import os
import shutil
from collections import defaultdict
import random

# Paths
source_folds_dir = '/Users/suzetteschulenburg/Desktop/Main2'  # Existing processed folds
new_folds_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'  # New balanced folds directory
categories = ['Good', 'Bad']  # Categories to process
num_folds = 5  # Number of folds

# Create new fold directories
def create_new_folds(base_dir, num_folds, categories):
    for fold in range(1, num_folds + 1):
        for category in categories:
            os.makedirs(os.path.join(base_dir, f'Fold{fold}', category), exist_ok=True)

# Collect all images grouped by base ID
def collect_images_by_base_id(base_dir, num_folds, categories):
    base_id_dict = defaultdict(list)
    for fold in range(1, num_folds + 1):
        for category in categories:
            category_dir = os.path.join(base_dir, f'Fold{fold}', category)
            if not os.path.exists(category_dir):
                print(f"Directory not found: {category_dir}")
                continue
            print(f"Processing {category} in Fold{fold}")  # Debugging log
            for filename in os.listdir(category_dir):
                if filename.lower().endswith('.jpg'):  # Ensure only .jpg files are processed
                    base_id = filename.split('_')[0]  # Extract base ID
                    img_path = os.path.join(category_dir, filename)
                    base_id_dict[base_id].append(img_path)
    return base_id_dict

# Distribute base IDs across folds, prioritizing balance
def distribute_base_ids_with_image_count(base_id_dict, num_folds):
    base_ids = list(base_id_dict.keys())
    random.shuffle(base_ids)
    base_ids.sort(key=lambda x: len(base_id_dict[x]), reverse=True)

    fold_image_counts = [0] * num_folds
    base_id_folds = [[] for _ in range(num_folds)]

    for base_id in base_ids:
        least_images_fold = fold_image_counts.index(min(fold_image_counts))
        base_id_folds[least_images_fold].append(base_id)
        fold_image_counts[least_images_fold] += len(base_id_dict[base_id])

    return base_id_folds

# Move images to new folds
def move_images_to_new_folds(base_id_dict, base_id_folds, new_folds_dir):
    for fold_idx, base_ids in enumerate(base_id_folds, 1):
        fold_dir = os.path.join(new_folds_dir, f'Fold{fold_idx}')
        for base_id in base_ids:
            for img_path in base_id_dict[base_id]:
                category = 'Good' if 'Good' in img_path else 'Bad'
                target_dir = os.path.join(fold_dir, category)
                shutil.copy(img_path, os.path.join(target_dir, os.path.basename(img_path)))
                print(f"Copied {img_path} to {target_dir}")  # Debugging log

# Main logic
def redistribute_data_with_balancing(source_folds_dir, new_folds_dir, num_folds, categories):
    # Create new fold directories
    create_new_folds(new_folds_dir, num_folds, categories)

    # Collect images by base ID
    base_id_dict = collect_images_by_base_id(source_folds_dir, num_folds, categories)

    # Get all unique base IDs
    all_base_ids = list(base_id_dict.keys())

    # Distribute base IDs across folds
    base_id_folds = distribute_base_ids_with_image_count(base_id_dict, num_folds)

    # Move images to new folds
    move_images_to_new_folds(base_id_dict, base_id_folds, new_folds_dir)

    print(f"Redistribution complete. New folds created at: {new_folds_dir}")

# Run the redistribution process
redistribute_data_with_balancing(source_folds_dir, new_folds_dir, num_folds, categories)

In [None]:
import os
from collections import defaultdict

# Base directory containing folds
base_directory = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# Function to count unique base IDs in each class per fold
def count_unique_base_ids_per_fold(base_directory):
    folds_data = {}
    for fold in range(1, 6):  # Iterate over folds 1 to 5
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_base_id_counts = {}
        for class_name in ['Good', 'Bad']:  # Check for 'Good' and 'Bad' classes
            class_dir = os.path.join(fold_dir, class_name)
            if os.path.exists(class_dir):
                base_ids = set()
                for filename in os.listdir(class_dir):
                    if filename.lower().endswith('.jpg'):
                        # Extract base ID (e.g., E2025 from E2025_IMG_8469_sharp.jpg)
                        base_id = filename.split('_')[0]
                        base_ids.add(base_id)
                class_base_id_counts[class_name] = len(base_ids)
            else:
                class_base_id_counts[class_name] = 0
        folds_data[f'Fold{fold}'] = class_base_id_counts
    return folds_data

# Get unique base ID counts per fold
folds_data = count_unique_base_ids_per_fold(base_directory)

# Print the results
for fold, counts in folds_data.items():
    print(f"{fold}: {counts}")

Count original images

In [None]:
import os
import re

# Base directory containing folds
base_directory = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# Pattern: match only *_IMG_<digits>_processed.jpg (nothing else in between)
pattern = re.compile(r'^.*IMG_\d+_processed\.jpg$', re.IGNORECASE)

def is_original_img(filename):
    """
    Return True if the filename matches IMG_<number>_processed.jpg exactly
    """
    return bool(pattern.fullmatch(filename.strip()))

def count_original_images_per_fold(base_directory):
    folds_data = {}
    for fold in range(1, 6):
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_counts = {}
        for class_name in ['Good', 'Bad']:
            class_dir = os.path.join(fold_dir, class_name)
            if os.path.exists(class_dir):
                all_files = os.listdir(class_dir)
                originals = [f for f in all_files if is_original_img(f)]
                class_counts[class_name] = len(originals)
            else:
                class_counts[class_name] = 0
        folds_data[f'Fold{fold}'] = class_counts
    return folds_data

# Run and print
folds_data = count_original_images_per_fold(base_directory)
for fold, counts in folds_data.items():
    print(f"{fold}: {counts}")

In [None]:
import os
import re

# === Base directory ===
base_directory = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# === Pattern for original image filenames (strict match) ===
original_pattern = re.compile(r'^.*IMG_\d+_processed\.jpg$', re.IGNORECASE)

def is_original_img(filename):
    return bool(original_pattern.fullmatch(filename.strip()))

def count_images_per_fold(base_directory):
    folds_data = {}

    for fold in range(1, 6):
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_stats = {}

        for class_name in ['Good', 'Bad']:
            class_dir = os.path.join(fold_dir, class_name)

            if os.path.exists(class_dir):
                all_files = [f for f in os.listdir(class_dir) if f.lower().endswith('.jpg')]
                originals = [f for f in all_files if is_original_img(f)]

                total = len(all_files)
                original_count = len(originals)
                augmentations = total - original_count
                avg_aug_per_img = round(augmentations / original_count, 2) if original_count > 0 else 0

                class_stats[class_name] = {
                    'Originals': original_count,
                    'Total': total,
                    'Augmented': augmentations,
                    'AvgAugPerOriginal': avg_aug_per_img
                }
            else:
                class_stats[class_name] = {
                    'Originals': 0,
                    'Total': 0,
                    'Augmented': 0,
                    'AvgAugPerOriginal': 0.0
                }

        folds_data[f'Fold{fold}'] = class_stats

    return folds_data

# === Run and print nicely ===
folds_data = count_images_per_fold(base_directory)

for fold, stats in folds_data.items():
    print(f"\n{fold}:")
    for cls, data in stats.items():
        print(f"  {cls}: Originals = {data['Originals']}, Total = {data['Total']}, "
              f"Augmented = {data['Augmented']}, Avg Augmentations = {data['AvgAugPerOriginal']}")

In [None]:
import os

# Base directory containing folds
base_directory = '/Users/suzetteschulenburg/Desktop/MainUse2'

# Function to count images in each class per fold
def count_images_per_fold(base_directory):
    folds_data = {}
    for fold in range(1, 6):  # Iterate over folds 1 to 5
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_counts = {}
        for class_name in ['Good', 'Bad']:  # Check for 'Good' and 'Bad' classes
            class_dir = os.path.join(fold_dir, class_name)
            if os.path.exists(class_dir):
                num_images = len([f for f in os.listdir(class_dir) if f.lower().endswith('.jpg')])
                class_counts[class_name] = num_images
            else:
                class_counts[class_name] = 0
        folds_data[f'Fold{fold}'] = class_counts
    return folds_data

# Get image counts per fold
folds_data = count_images_per_fold(base_directory)

# Print the results
for fold, counts in folds_data.items():
    print(f"{fold}: {counts}")

In [None]:
import os
import re

# Base directory containing folds
base_directory = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# Pattern: match only *_IMG_<digits>_processed.jpg (nothing else in between)
pattern = re.compile(r'^.*IMG_\d+_processed\.jpg$', re.IGNORECASE)

def is_original_img(filename):
    """
    Return True if the filename matches IMG_<number>_processed.jpg exactly
    """
    return bool(pattern.fullmatch(filename.strip()))

def count_original_images_per_fold(base_directory):
    folds_data = {}
    for fold in range(1, 6):
        fold_dir = os.path.join(base_directory, f'Fold{fold}')
        class_counts = {}
        for class_name in ['Good', 'Bad']:
            class_dir = os.path.join(fold_dir, class_name)
            if os.path.exists(class_dir):
                all_files = os.listdir(class_dir)
                originals = [f for f in all_files if is_original_img(f)]
                class_counts[class_name] = len(originals)
            else:
                class_counts[class_name] = 0
        folds_data[f'Fold{fold}'] = class_counts
    return folds_data

# Run and print
folds_data = count_original_images_per_fold(base_directory)
for fold, counts in folds_data.items():
    print(f"{fold}: {counts}")

# Add Martiens cows

Make excel sheet better align with photo number

In [None]:
import pandas as pd

# Load the Excel file
excel_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/UselessExcel/Martiens.xlsx"
df = pd.read_excel(excel_path)

# Create an empty list to store the processed data
expanded_data = []

# Determine the first photo number dynamically
previous_end_photo_number = None

# Loop through the DataFrame rows
for i, row in df.iterrows():
    base_id = row["Id"]
    last_photo_number = row["Photo number"]  # The number in Excel is the last image
    rating = row["Rating"]
    
    # Determine the first photo number for this base ID
    if previous_end_photo_number is None:
        first_photo_number = last_photo_number - 2  # Start of the first ID (assuming 3 images)
    else:
        first_photo_number = previous_end_photo_number + 1  # Next image after previous base_id
    
    # Ensure the last image remains consistent
    for j, photo_number in enumerate(range(first_photo_number, last_photo_number + 1)):
        new_id = f"{base_id}_{j+1}"
        expanded_data.append([new_id, photo_number, rating])

    # Update previous end photo number
    previous_end_photo_number = last_photo_number

# Convert to a DataFrame
expanded_df = pd.DataFrame(expanded_data, columns=["ID", "Photo Number", "Rating"])

# Save to a new Excel file
output_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/ProcessedExcel.xlsx"
expanded_df.to_excel(output_path, index=False)

print(f"Processed Excel saved to: {output_path}")

Add gender to new excvel file

In [None]:
import pandas as pd

# Paths to the Excel files
original_excel_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/UselessExcel/Martiens.xlsx"
processed_excel_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/ProcessedExcel.xlsx"
output_excel_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/UselessExcel/ProcessedExcel_WithGender.xlsx"

# Load the original and processed Excel files
original_df = pd.read_excel(original_excel_path)
processed_df = pd.read_excel(processed_excel_path)

# Ensure column names are formatted correctly
original_df = original_df.rename(columns={"Photo number": "Photo Number", "Id": "Base_ID"})
processed_df = processed_df.rename(columns={"ID": "Full_ID"})

# Extract base ID from Full_ID (before the underscore)
processed_df["Base_ID"] = processed_df["Full_ID"].apply(lambda x: x.split('_')[0])

# Merge processed_df with original_df to add Gender using Base_ID
merged_df = processed_df.merge(original_df[['Base_ID', 'Gender']], on="Base_ID", how="left")

# Rename the columns back
merged_df = merged_df.rename(columns={"Base_ID": "ID"})

# Save the updated processed Excel file
merged_df.to_excel(output_excel_path, index=False)

print(f"Updated processed file saved to: {output_excel_path}")

Rename images

In [None]:
import os
import pandas as pd

# Paths
excel_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/UselessExcel/ProcessedExcel_WithGender.xlsx"
image_dir = "/Users/suzetteschulenburg/Desktop/Masters/Beeste/Backup/MARTIENS 2/MARTIENS VEILING"

# Load the Excel file
df = pd.read_excel(excel_path)

# Ensure column names are correctly formatted
df = df.rename(columns={"ID": "Base_ID", "Photo Number": "Photo_Number"})

# Function to rename images in the main folder while keeping "IMG_" in the name
def rename_images(image_dir, df):
    for _, row in df.iterrows():
        base_id = row["Base_ID"]  # The new base ID with numbering
        photo_number = row["Photo_Number"]  # The corresponding photo number
        
        # Construct the original and new filenames with the correct "IMG_" format
        original_filename = f"IMG_{str(photo_number).zfill(4)}.jpg"  # Ensures leading zeros (e.g., IMG_0590)
        new_filename = f"{base_id}_IMG_{str(photo_number).zfill(4)}.jpg"

        # Define full paths
        original_path = os.path.join(image_dir, original_filename)
        new_path = os.path.join(image_dir, new_filename)

        # Rename if the file exists
        if os.path.exists(original_path):
            os.rename(original_path, new_path)
            print(f"Renamed: {original_path} → {new_path}")
        else:
            print(f"Skipped (file not found): {original_path}")

# Execute renaming
rename_images(image_dir, df)

print("Image renaming completed!")

Split cows into good and bad folders

In [None]:
import os
import pandas as pd
import shutil

# Paths
excel_path = "/Users/suzetteschulenburg/Desktop/Masters/Data/UselessExcel/ProcessedExcel_WithGender.xlsx"
image_dir = "/Users/suzetteschulenburg/Desktop/Masters/Beeste/Backup/MARTIENS 2/MARTIENS VEILING"

# New directories for classification
good_dir = "/Users/suzetteschulenburg/Desktop/Masters/Beeste/Backup/MARTIENS/Good"
bad_dir = "/Users/suzetteschulenburg/Desktop/Masters/Beeste/Backup/MARTIENS/Bad"

# Create new directories if they don't exist
os.makedirs(good_dir, exist_ok=True)
os.makedirs(bad_dir, exist_ok=True)

# Load the Excel file
df = pd.read_excel(excel_path)

# Ensure column names are correctly formatted
df = df.rename(columns={"ID": "Base_ID", "Photo Number": "Photo_Number", "Gender": "Gender", "Rating": "Rating"})

# Function to move images based on rating and gender
def move_images(image_dir, df, good_dir, bad_dir):
    for _, row in df.iterrows():
        base_id = row["Base_ID"]  # The renamed base ID
        photo_number = row["Photo_Number"]  # The corresponding photo number
        gender = row["Gender"]  # Gender of the cattle
        rating = row["Rating"]  # Rating score

        # Only process female ("F") images
        if gender == "F":
            # Construct image filename
            image_filename = f"{base_id}_IMG_{str(photo_number).zfill(4)}.jpg"
            original_path = os.path.join(image_dir, image_filename)

            # Determine destination folder
            if rating in [2, 3, 4]:
                target_path = os.path.join(bad_dir, image_filename)
            elif rating in [8, 9]:
                target_path = os.path.join(good_dir, image_filename)
            else:
                continue  # Skip ratings that are not in the specified range

            # Move the file if it exists
            if os.path.exists(original_path):
                shutil.move(original_path, target_path)
                print(f"Moved: {original_path} → {target_path}")
            else:
                print(f"Skipped (file not found): {original_path}")

# Execute image moving
move_images(image_dir, df, good_dir, bad_dir)

print("Image classification into 'Good' and 'Bad' completed!")

Add augmentation to Martiens

In [None]:
import os
import numpy as np
from PIL import Image, ImageOps, ImageEnhance, ImageFilter

# Define source directories
good_dir = '/Users/suzetteschulenburg/Desktop/Add martiens by main/Good'
bad_dir = '/Users/suzetteschulenburg/Desktop/Add martiens by main/Bad'

# Function to add Gaussian noise
def add_gaussian_noise(img, mean=0, std=25):
    np_img = np.array(img)
    noise = np.random.normal(mean, std, np_img.shape).astype(np.uint8)
    noisy_img = np_img + noise
    noisy_img = np.clip(noisy_img, 0, 255)
    return Image.fromarray(noisy_img)

# Function to adjust gamma
def adjust_gamma(img, gamma=1.5):
    inv_gamma = 1.0 / gamma
    table = [((i / 255.0) ** inv_gamma) * 255 for i in range(256)]
    if img.mode == 'RGB':
        return img.point(table * 3)  # RGB images have 3 channels
    elif img.mode == 'L':
        return img.point(table)  # Grayscale images have 1 channel
    else:
        raise ValueError(f'Unsupported image mode: {img.mode}')

# Function to adjust hue
def adjust_hue(img, hue_factor=0.5):
    img = np.array(img.convert('HSV'))
    img[..., 0] = (img[..., 0].astype(int) + int(hue_factor * 255)) % 255
    return Image.fromarray(img, 'HSV').convert('RGB')

# Function to count images in a directory
def count_images(directory):
    return len([f for f in os.listdir(directory) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

# Function to augment each image to have 11 total variations
def augment_images(source_dir):
    for img_name in os.listdir(source_dir):
        if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(source_dir, img_name)
            img = Image.open(img_path)
            base_name = os.path.splitext(img_name)[0]

            # Check if there are already 11 images for this base ID
            existing_images = [f for f in os.listdir(source_dir) if f.startswith(base_name)]
            if len(existing_images) >= 11:
                print(f"Skipping {img_name}, already has {len(existing_images)} images.")
                continue  # Skip if already has 11 variations

            augmentations_needed = 11 - len(existing_images)

            for i in range(augmentations_needed):
                img_aug = img.copy()

                # Apply a random augmentation
                augmentation_choice = np.random.choice([
                    'flip', 'gray', 'noisy', 'sharp', 'contrast', 'blur', 'gamma', 'hue', 'saturation'
                ])
                if augmentation_choice == 'flip':
                    img_aug = ImageOps.mirror(img_aug)
                elif augmentation_choice == 'gray':
                    img_aug = ImageOps.grayscale(img_aug)
                elif augmentation_choice == 'noisy':
                    img_aug = add_gaussian_noise(img_aug)
                elif augmentation_choice == 'sharp':
                    img_aug = ImageEnhance.Sharpness(img_aug).enhance(2.0)
                elif augmentation_choice == 'contrast':
                    img_aug = ImageEnhance.Contrast(img_aug).enhance(1.5)
                elif augmentation_choice == 'blur':
                    img_aug = img_aug.filter(ImageFilter.GaussianBlur(radius=2))
                elif augmentation_choice == 'gamma':
                    img_aug = adjust_gamma(img_aug)
                elif augmentation_choice == 'hue':
                    img_aug = adjust_hue(img_aug)
                elif augmentation_choice == 'saturation':
                    img_aug = ImageEnhance.Color(img_aug).enhance(1.5)

                # Save the augmented image
                img_aug.save(os.path.join(source_dir, f"{base_name}_aug{i}.jpg"))

    print(f"Augmentation complete for {source_dir}")

# Perform augmentation on "Good" and "Bad" folders
augment_images(good_dir)
augment_images(bad_dir)

print("Augmentation complete for all images in 'Good' and 'Bad' folders.")

Count base_ids

In [None]:
import os
from collections import defaultdict

# Directories
good_dir = "/Users/suzetteschulenburg/Desktop/Masters/Beeste/Backup/MARTIENS/Good"
bad_dir = "/Users/suzetteschulenburg/Desktop/Masters/Beeste/Backup/MARTIENS/Bad"

# Function to extract base IDs
def count_unique_base_ids(directory):
    base_id_set = set()
    
    for filename in os.listdir(directory):
        if filename.lower().endswith(".jpg"):  # Ensure we only check image files
            base_id = filename.split('_')[0]  # Extract base ID before first underscore
            base_id_set.add(base_id)  # Add to set (ensures uniqueness)

    return len(base_id_set), base_id_set  # Return count and base IDs

# Count unique base IDs in Good and Bad folders
good_count, good_base_ids = count_unique_base_ids(good_dir)
bad_count, bad_base_ids = count_unique_base_ids(bad_dir)

# Print results
print(f"Unique base IDs in Good folder: {good_count}")
print(f"Unique base IDs in Bad folder: {bad_count}")


print("Base ID report saved as 'base_ids_report.txt'.")

# Training

Get best learning rate VGG

In [None]:

import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.regularizers import l2
import pickle

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesGetBestLR'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesGetBestLR'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the VGG16-based model with improvements
def create_vgg16_model(image_shape, learning_rate=1e-5):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    x = Flatten()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)  # Added L2 Regularization
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)  # Increased dropout
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# List of learning rates to test
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]

# Cross-validation loop
validation_metrics = []
test_metrics = []

for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")

    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set with LR = {lr}")
        
        # Define training and validation sets
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data from the selected folds
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation and test data
        X_val, y_val = load_images_and_labels(val_fold)
        X_test, y_test = load_images_and_labels(test_image_dir)
        
        # Train the model
        model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=25, verbose=1, restore_best_weights=True),  # Increased patience
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, verbose=1, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}_lr{lr}.keras'), save_best_only=True, verbose=1)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=70,  # Changed to 70 epochs
            batch_size=32,
            callbacks=callbacks,
            verbose=2
        )
        
        # Save training history
        history_path = os.path.join(history_save_dir, f'history_fold{val_fold_index + 1}_lr{lr}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
        validation_metrics.append({'lr': lr, 'fold': val_fold_index + 1, 'accuracy': val_accuracy * 100, 'loss': val_loss})
        print(f"Validation Accuracy for Fold {val_fold_index + 1}: {val_accuracy:.2f}%")
        
        # Evaluate on test set
        test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
        test_metrics.append({'lr': lr, 'fold': val_fold_index + 1, 'accuracy': test_accuracy * 100, 'loss': test_loss})
        print(f"Test Accuracy for Fold {val_fold_index + 1}: {test_accuracy:.2f}%")
        
        # Save the model
        model.save(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}_lr{lr}.keras'))

# Metrics summary
print("\nValidation Metrics:")
for metric in validation_metrics:
    print(f"LR {metric['lr']} - Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

print("\nTest Metrics:")
for metric in test_metrics:
    print(f"LR {metric['lr']} - Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

# Compute averages and standard deviations
val_accuracies = [m['accuracy'] for m in validation_metrics]
test_accuracies = [m['accuracy'] for m in test_metrics]
print(f"\nAverage Validation Accuracy: {np.mean(val_accuracies):.2f}%, Std Dev: {np.std(val_accuracies):.2f}")
print(f"Average Test Accuracy: {np.mean(test_accuracies):.2f}%, Std Dev: {np.std(test_accuracies):.2f}")


Analyze LR graphs

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesGetBestLR'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesGetBestLR'
metrics_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Metrics'
os.makedirs(metrics_save_dir, exist_ok=True)

metrics_file = os.path.join(metrics_save_dir, 'metrics.pkl')

# Learning rates tested
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5]
num_folds = 5  # Number of folds used in cross-validation

# Check if saved metrics exist
if os.path.exists(metrics_file):
    print("Loading saved metrics...")
    with open(metrics_file, 'rb') as f:
        vgg16_results = pickle.load(f)
else:
    print("Processing metrics from scratch...")
    # Initialize dictionary
    metrics = ['f1_score', 'precision', 'recall', 'auc', 'val_loss']
    vgg16_results = {metric: {lr: [] for lr in learning_rates} for metric in metrics}

    # Loop through learning rates and folds
    for lr in learning_rates:
        for fold in range(1, num_folds + 1):
            model_path = os.path.join(model_save_dir, f'model_fold{fold}_lr{lr}.keras')
            history_path = os.path.join(history_save_dir, f'history_fold{fold}_lr{lr}.pkl')
            val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

            # Skip missing models
            if not os.path.exists(model_path):
                print(f"Skipping: Model not found for LR={lr}, Fold={fold}")
                continue

            print(f"Loading model for LR={lr}, Fold={fold}...")
            model = load_model(model_path)

            # Load validation data
            X_val, y_val = [], []
            if os.path.exists(val_fold_dir):
                for subdir in ['Good', 'Bad']:
                    full_image_dir = os.path.join(val_fold_dir, subdir)
                    if not os.path.exists(full_image_dir):
                        continue
                    for fname in os.listdir(full_image_dir):
                        if fname.endswith('.jpg'):
                            image_path = os.path.join(full_image_dir, fname)
                            image_array = load_img(image_path, target_size=(224, 224))
                            X_val.append(img_to_array(image_array) / 255.0)
                            y_val.append(1 if subdir == 'Good' else 0)
                X_val, y_val = np.array(X_val), np.array(y_val)

            if len(X_val) == 0:
                print(f"Skipping: No validation images found for LR={lr}, Fold={fold}")
                continue

            # Predict
            val_probs = model.predict(X_val)
            val_preds = (val_probs > 0.5).astype(int)

            # Compute metrics
            val_f1 = f1_score(y_val, val_preds)
            val_precision = precision_score(y_val, val_preds)
            val_recall = recall_score(y_val, val_preds)
            val_auc = roc_auc_score(y_val, val_probs)

            vgg16_results['f1_score'][lr].append(val_f1)
            vgg16_results['precision'][lr].append(val_precision)
            vgg16_results['recall'][lr].append(val_recall)
            vgg16_results['auc'][lr].append(val_auc)

            print(f"Fold {fold} - Avg F1 Score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, AUC: {val_auc:.4f}")

            # Load val_loss
            if os.path.exists(history_path):
                with open(history_path, 'rb') as f:
                    history = pickle.load(f)
                if 'val_loss' in history:
                    avg_val_loss = np.mean(history['val_loss'])
                    vgg16_results['val_loss'][lr].append(avg_val_loss)
            else:
                print(f"Skipping: History file not found for LR={lr}, Fold={fold}")

    # Save metrics
    print("Saving extracted metrics...")
    with open(metrics_file, 'wb') as f:
        pickle.dump(vgg16_results, f)

# Average metrics per LR
avg_vgg16_metrics = {
    metric: {
        lr: np.mean(vgg16_results[metric][lr]) if len(vgg16_results[metric][lr]) > 0 else np.nan
        for lr in learning_rates
    }
    for metric in vgg16_results
}

import matplotlib.ticker as ticker

# Plot
fig, ax1 = plt.subplots(figsize=(12, 6))
ax1.set_xlabel('Learning Rate')
ax1.set_xscale('log')
ax1.set_ylabel('Performance Metrics')

# Learning rates (x) and Y values
f1_vals = [avg_vgg16_metrics['f1_score'][lr] for lr in learning_rates]
prec_vals = [avg_vgg16_metrics['precision'][lr] for lr in learning_rates]
rec_vals = [avg_vgg16_metrics['recall'][lr] for lr in learning_rates]
auc_vals = [avg_vgg16_metrics['auc'][lr] for lr in learning_rates]
val_loss_vals = [avg_vgg16_metrics['val_loss'][lr] for lr in learning_rates]

# Plot performance metrics
f1_line, = ax1.plot(learning_rates, f1_vals, marker='o', linestyle='-', color='g', label='F1 Score')
prec_line, = ax1.plot(learning_rates, prec_vals, marker='s', linestyle='-', color='r', label='Precision')
rec_line, = ax1.plot(learning_rates, rec_vals, marker='D', linestyle='-', color='orange', label='Recall')
auc_line, = ax1.plot(learning_rates, auc_vals, marker='^', linestyle='-', color='purple', label='AUC')

# Add percentage text annotations on primary axis
for i, lr in enumerate(learning_rates):
    if not np.isnan(f1_vals[i]):
        ax1.text(lr, f1_vals[i], f"{f1_vals[i]*100:.1f}%", fontsize=9, color='g', ha='center', va='bottom')
    if not np.isnan(prec_vals[i]):
        ax1.text(lr, prec_vals[i], f"{prec_vals[i]*100:.1f}%", fontsize=9, color='r', ha='center', va='bottom')
    if not np.isnan(rec_vals[i]):
        ax1.text(lr, rec_vals[i], f"{rec_vals[i]*100:.1f}%", fontsize=9, color='orange', ha='center', va='bottom')
    if not np.isnan(auc_vals[i]):
        ax1.text(lr, auc_vals[i], f"{auc_vals[i]*100:.1f}%", fontsize=9, color='purple', ha='center', va='bottom')

# Format x-axis with exact values
ax1.set_xticks(learning_rates)
ax1.get_xaxis().set_major_formatter(ticker.ScalarFormatter())
ax1.tick_params(axis='x', rotation=45)

# Twin y-axis for validation loss
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='b')
val_loss_line, = ax2.plot(learning_rates, val_loss_vals, marker='x', linestyle='--', color='b', label='Validation Loss')

# Add validation loss text
for i, lr in enumerate(learning_rates):
    if not np.isnan(val_loss_vals[i]):
        ax2.text(lr, val_loss_vals[i], f"{val_loss_vals[i]:.3f}", fontsize=9, color='b', ha='center', va='bottom')

ax2.tick_params(axis='y', labelcolor='b')

# Title and layout
fig.suptitle('VGG16 Learning Rate Performance Metrics (Averaged)', fontsize=14)
fig.tight_layout()
fig.legend([f1_line, prec_line, rec_line, auc_line, val_loss_line],
           ['F1 Score', 'Precision', 'Recall', 'AUC', 'Validation Loss'],
           loc="upper right")
plt.grid(True)
plt.show()



In [None]:
import os
import numpy as np
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import average_precision_score

# Set your directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesGetBestLR'
metrics_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Metrics'
metrics_file = os.path.join(metrics_save_dir, 'metrics.pkl')

# Config
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5

# Load existing metrics
if not os.path.exists(metrics_file):
    raise FileNotFoundError("metrics.pkl not found. Run your main evaluation script first.")

with open(metrics_file, 'rb') as f:
    vgg16_results = pickle.load(f)

# Create average_precision key if missing
if 'average_precision' not in vgg16_results:
    vgg16_results['average_precision'] = {lr: [] for lr in learning_rates}

# Patch: compute only average_precision
for lr in learning_rates:
    existing_folds = len(vgg16_results['average_precision'][lr])
    for fold in range(existing_folds + 1, num_folds + 1):
        model_path = os.path.join(model_save_dir, f'model_fold{fold}_lr{lr}.keras')
        val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

        if not os.path.exists(model_path):
            print(f"Skipping: Model not found for LR={lr}, Fold={fold}")
            continue

        print(f"Loading model for LR={lr}, Fold={fold}...")
        model = load_model(model_path)

        # Load validation images and labels
        X_val, y_val = [], []
        for subdir in ['Good', 'Bad']:
            full_image_dir = os.path.join(val_fold_dir, subdir)
            if not os.path.exists(full_image_dir):
                continue
            for fname in os.listdir(full_image_dir):
                if fname.lower().endswith('.jpg'):
                    image_path = os.path.join(full_image_dir, fname)
                    img = load_img(image_path, target_size=(224, 224))
                    X_val.append(img_to_array(img) / 255.0)
                    y_val.append(1 if subdir == 'Good' else 0)

        X_val, y_val = np.array(X_val), np.array(y_val)

        if len(X_val) == 0:
            print(f"Skipping: No validation data for LR={lr}, Fold={fold}")
            continue

        val_probs = model.predict(X_val)
        try:
            val_ap = average_precision_score(y_val, val_probs)
            vgg16_results['average_precision'][lr].append(val_ap)
            print(f"Fold {fold} LR={lr} → Average Precision: {val_ap:.4f}")
        except ValueError as e:
            print(f"Could not compute average precision for LR={lr}, Fold={fold}: {e}")
            vgg16_results['average_precision'][lr].append(None)

# Save the updated metrics
with open(metrics_file, 'wb') as f:
    pickle.dump(vgg16_results, f)

print("✅ metrics.pkl updated with average_precision.")

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt

# === Load saved metrics ===
metrics_file = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Metrics/metrics.pkl'

with open(metrics_file, 'rb') as f:
    vgg16_results = pickle.load(f)

# === Learning rates ===
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]

# === Required metrics check ===
required_metrics = ['f1_score', 'precision', 'recall', 'val_loss', 'average_precision']
for metric in required_metrics:
    if metric not in vgg16_results:
        raise KeyError(f"'{metric}' not found in metrics.pkl")

# === Compute average per learning rate ===
avg_vgg16_metrics = {
    metric: {
        lr: np.mean([v for v in vgg16_results[metric][lr] if v is not None])
        for lr in learning_rates
    }
    for metric in required_metrics
}

# === Plotting ===
fig, ax1 = plt.subplots(figsize=(10, 5))

ax1.set_xlabel('Learning Rate')
ax1.set_xscale('log')
ax1.set_ylabel('Performance Metrics')

# Plot main metrics on left Y-axis
ax1.plot(learning_rates, [avg_vgg16_metrics['f1_score'][lr] for lr in learning_rates], marker='o', linestyle='-', color='green', label='F1 Score')
ax1.plot(learning_rates, [avg_vgg16_metrics['precision'][lr] for lr in learning_rates], marker='s', linestyle='-', color='red', label='Precision')
ax1.plot(learning_rates, [avg_vgg16_metrics['recall'][lr] for lr in learning_rates], marker='D', linestyle='-', color='orange', label='Recall')
ax1.plot(learning_rates, [avg_vgg16_metrics['average_precision'][lr] for lr in learning_rates], marker='^', linestyle='-', color='purple', label='PR AUC')

ax1.tick_params(axis='y')

# Plot validation loss on secondary Y-axis
valid_lrs = []
valid_losses = []
for lr in learning_rates:
    val = avg_vgg16_metrics['val_loss'][lr]
    if val is not None:
        valid_lrs.append(lr)
        valid_losses.append(val)

ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='blue')
ax2.plot(valid_lrs, valid_losses, marker='x', linestyle='--', color='blue', label='Validation Loss')
ax2.tick_params(axis='y', labelcolor='blue')

# Title, legend, and layout
fig.suptitle('VGG16 Learning Rate Performance Metrics (Using PR AUC)')
fig.tight_layout()
fig.legend(loc='upper right', bbox_to_anchor=(1, 1), bbox_transform=ax1.transAxes)
plt.grid(True)
plt.show()

Graph loss and acc

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt

# === Directories and Config ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesGetBestLR'
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5]
num_folds = 5

# === Initialize storage for histories ===
history_data = {lr: {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []} for lr in learning_rates}

# === Load and verify histories ===
for lr in learning_rates:
    for fold in range(1, num_folds + 1):
        history_path = os.path.join(history_save_dir, f'history_fold{fold}_lr{lr}.pkl')
        if os.path.exists(history_path):
            with open(history_path, 'rb') as f:
                history = pickle.load(f)
                missing_keys = []

                if 'loss' in history:
                    history_data[lr]['train_loss'].append(history['loss'])
                else:
                    missing_keys.append('loss')

                if 'val_loss' in history:
                    history_data[lr]['val_loss'].append(history['val_loss'])
                else:
                    missing_keys.append('val_loss')

                if 'accuracy' in history:
                    history_data[lr]['train_acc'].append(history['accuracy'])
                else:
                    missing_keys.append('accuracy')

                if 'val_accuracy' in history:
                    history_data[lr]['val_acc'].append(history['val_accuracy'])
                else:
                    missing_keys.append('val_accuracy')

                if missing_keys:
                    print(f"⚠️ Missing keys in fold {fold}, LR={lr}: {missing_keys}")
        else:
            print(f"❌ Missing history file: Fold {fold}, LR={lr}")

# === Average across folds per epoch (truncate to shortest length) ===
avg_history = {lr: {} for lr in learning_rates}
for lr in learning_rates:
    for key in ['train_loss', 'val_loss', 'train_acc', 'val_acc']:
        fold_histories = history_data[lr][key]
        if fold_histories:
            min_len = min(len(h) for h in fold_histories)
            trimmed = [h[:min_len] for h in fold_histories]
            avg_history[lr][key] = np.mean(trimmed, axis=0)

# === Plotting ===
fig, axs = plt.subplots(2, 2, figsize=(14, 10))
axs = axs.flatten()
metric_titles = ['Training Loss', 'Validation Loss', 'Training Accuracy', 'Validation Accuracy']
metric_keys = ['train_loss', 'val_loss', 'train_acc', 'val_acc']

for idx, (title, key) in enumerate(zip(metric_titles, metric_keys)):
    for lr in learning_rates:
        if key in avg_history[lr]:
            axs[idx].plot(avg_history[lr][key], label=f'LR={lr}')
    axs[idx].set_title(title)
    axs[idx].set_xlabel('Epoch')
    axs[idx].set_ylabel('Value')
    axs[idx].legend()
    axs[idx].grid(True)

fig.suptitle('Training vs Validation Metrics Across Learning Rates (Averaged over Folds)', fontsize=16)
fig.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

# Do VGG with best lr 1e-4

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.regularizers import l2
import pickle

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLRMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the VGG16-based model with improvements
def create_vgg16_model(image_shape, learning_rate=1e-4):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    x = Flatten()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)  # Added L2 Regularization
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)  # Increased dropout
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Cross-validation loop
validation_metrics = []
test_metrics = []

for val_fold_index in range(5):
    print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
    
    # Define training and validation sets
    train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
    val_fold = fold_dirs[val_fold_index]
    
    # Load training data from the selected folds
    X_train, y_train = [], []
    for train_fold in train_folds:
        images, labels = load_images_and_labels(train_fold)
        X_train.append(images)
        y_train.append(labels)
    X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
    
    # Load validation and test data
    X_val, y_val = load_images_and_labels(val_fold)
    X_test, y_test = load_images_and_labels(test_image_dir)
    
    # Train the model
    model = create_vgg16_model(X_train.shape[1:])
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, verbose=1, restore_best_weights=True),  # Increased patience
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, verbose=1, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras'), save_best_only=True, verbose=1)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,  # Increased epochs
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save training history
    history_path = os.path.join(history_save_dir, f'history_fold{val_fold_index + 1}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # Evaluate on validation set
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    validation_metrics.append({'fold': val_fold_index + 1, 'accuracy': val_accuracy * 100, 'loss': val_loss})
    print(f"Validation Accuracy for Fold {val_fold_index + 1}: {val_accuracy:.2f}%")
    
    # Evaluate on test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    test_metrics.append({'fold': val_fold_index + 1, 'accuracy': test_accuracy * 100, 'loss': test_loss})
    print(f"Test Accuracy for Fold {val_fold_index + 1}: {test_accuracy:.2f}%")
    
    # Save the model
    model.save(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras'))

# Metrics summary
print("\nValidation Metrics:")
for metric in validation_metrics:
    print(f"Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

print("\nTest Metrics:")
for metric in test_metrics:
    print(f"Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

# Compute averages and standard deviations
val_accuracies = [m['accuracy'] for m in validation_metrics]
test_accuracies = [m['accuracy'] for m in test_metrics]
print(f"\nAverage Validation Accuracy: {np.mean(val_accuracies):.2f}%, Std Dev: {np.std(val_accuracies):.2f}")
print(f"Average Test Accuracy: {np.mean(test_accuracies):.2f}%, Std Dev: {np.std(test_accuracies):.2f}")

Analyze

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt

# Directory where histories are saved
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLRMinus4'

# Lists to store histories
histories = []

# Load all history files
for fold in range(1, 6):
    history_path = os.path.join(history_save_dir, f'history_fold{fold}.pkl')
    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            histories.append(pickle.load(f))
    else:
        print(f"Warning: {history_path} not found!")

# Plot training and validation accuracy
plt.figure(figsize=(12, 6))
for i, history in enumerate(histories):
    plt.plot(history['accuracy'], label=f'Fold {i+1} Train Acc')
    plt.plot(history['val_accuracy'], linestyle='dashed', label=f'Fold {i+1} Val Acc')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training & Validation Accuracy Across Folds')
plt.legend()
plt.grid()
plt.show()

# Plot training and validation loss
plt.figure(figsize=(12, 6))
for i, history in enumerate(histories):
    plt.plot(history['loss'], label=f'Fold {i+1} Train Loss')
    plt.plot(history['val_loss'], linestyle='dashed', label=f'Fold {i+1} Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training & Validation Loss Across Folds')
plt.legend()
plt.grid()
plt.show()

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4/model_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Bad/JH1633_IMG_9233_contrast.jpg'  # Replace with your actual image

# === LOAD MODEL ===
model = load_model(model_path)

# === LOAD AND PREPROCESS IMAGE ===
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
img_preprocessed = preprocess_input(img_batch)

# === GRAD-CAM SETUP (from last conv layer) ===
last_conv_layer = 'block5_conv3'  # Best for VGG16
grad_model = tf.keras.models.Model(
    [model.inputs],
    [model.get_layer(last_conv_layer).output, model.output]
)

# === GRADIENTS ===
with tf.GradientTape() as tape:
    conv_outputs, predictions = grad_model(img_preprocessed)
    loss = predictions[:, 0]  # for sigmoid output

# Compute gradients wrt feature maps
grads = tape.gradient(loss, conv_outputs)[0]  # shape: (14, 14, 512)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1))  # shape: (512,)

# Multiply each channel in feature map by gradient importance
conv_outputs = conv_outputs[0]  # shape: (14, 14, 512)
conv_outputs = conv_outputs * pooled_grads[tf.newaxis, tf.newaxis, :]


# Average to get heatmap
heatmap = tf.reduce_mean(conv_outputs, axis=-1)

# === NORMALIZE HEATMAP ===
heatmap = np.maximum(heatmap, 0)
heatmap /= (np.max(heatmap) + 1e-6)
heatmap = cv2.resize(heatmap, (224, 224))

# === COLORIZE AND OVERLAY ===
heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
img_uint8 = np.uint8(img_array)  # original image in uint8 for blending
overlayed_img = cv2.addWeighted(img_uint8, 0.6, heatmap_colored, 0.4, 0)

# === DISPLAY ===
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.title("Original Image")
plt.imshow(cv2.cvtColor(img_uint8, cv2.COLOR_BGR2RGB))
plt.axis('off')

plt.subplot(1, 2, 2)
plt.title(f"Grad-CAM\nPredicted Score: {predictions.numpy()[0][0]:.2f}")
plt.imshow(cv2.cvtColor(overlayed_img, cv2.COLOR_BGR2RGB))
plt.axis('off')

plt.tight_layout()
plt.show()

fm

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4/model_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Bad/JH1633_IMG_9233_contrast.jpg'

# === Load model and image ===
model = load_model(model_path)
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)

# === Get outputs of convolutional layers only ===
layer_outputs = [layer.output for layer in model.layers if 'conv' in layer.name]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img_array)
conv_layer_names = [layer.name for layer in model.layers if 'conv' in layer.name]

# === Display more filters and bigger plots ===
def display_activations(activations, layer_names, max_images=10):
    for layer_name, activation in zip(layer_names, activations):
        n_features = activation.shape[-1]
        size = activation.shape[1]

        # Show up to max_images activations
        n_cols = min(max_images, n_features)
        display_grid = np.zeros((size, size * n_cols))

        for col in range(n_cols):
            feature_map = activation[0, :, :, col]
            feature_map -= feature_map.mean()
            feature_map /= (feature_map.std() + 1e-5)
            feature_map *= 64
            feature_map += 128
            feature_map = np.clip(feature_map, 0, 255).astype('uint8')
            display_grid[:, col * size:(col + 1) * size] = feature_map

        # Larger figure size
        plt.figure(figsize=(n_cols * 2, 2.5))  # Width x Height per filter
        plt.imshow(display_grid, aspect='auto', cmap='plasma')  # Try 'gray', 'viridis', or 'plasma'
        plt.title(f'Activations from: {layer_name}', fontsize=14)
        plt.axis('off')
        plt.tight_layout()
        plt.show()

# === Run visualization ===
display_activations(activations, conv_layer_names)

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4/model_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Bad/JH1633_IMG_9233_contrast.jpg'


# === Load model and image ===
model = load_model(model_path)
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
img_preprocessed = preprocess_input(img_batch)

# === Part 1: Grad-CAM from final conv layer ===
last_conv_layer = 'block1_conv2'
grad_model = Model(inputs=model.input, outputs=[model.get_layer(last_conv_layer).output, model.output])

with tf.GradientTape() as tape:
    conv_outputs, prediction = grad_model(img_preprocessed)
    loss = prediction[:, 0]

grads = tape.gradient(loss, conv_outputs)[0]
pooled_grads = tf.reduce_mean(grads, axis=(0, 1))
conv_outputs = conv_outputs[0] * pooled_grads[tf.newaxis, tf.newaxis, :]
heatmap = tf.reduce_mean(conv_outputs, axis=-1).numpy()
heatmap = np.maximum(heatmap, 0)
heatmap /= (np.max(heatmap) + 1e-8)
heatmap = cv2.resize(heatmap, (224, 224))
heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
overlay = cv2.addWeighted(heatmap_colored, 0.4, img_array.astype(np.uint8), 0.6, 0)

# === Part 2: Raw activation maps from earlier layers ===
layer_names = ['block1_conv2', 'block3_conv3', 'block5_conv3']
activation_model = Model(inputs=model.input, outputs=[model.get_layer(name).output for name in layer_names])
activations = activation_model.predict(img_preprocessed)

def show_activation_grid(activation, layer_name, max_features=8):
    num_features = min(max_features, activation.shape[-1])
    fig, axes = plt.subplots(1, num_features, figsize=(num_features * 2, 2))
    fig.suptitle(f"{layer_name} Activation Maps", fontsize=12)
    for i in range(num_features):
        ax = axes[i]
        feature_map = activation[0, :, :, i]
        feature_map -= feature_map.mean()
        feature_map /= (feature_map.std() + 1e-6)
        feature_map = np.clip(feature_map * 64 + 128, 0, 255).astype('uint8')
        ax.imshow(feature_map, cmap='gray')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# === Display results ===
plt.figure(figsize=(6, 5))
plt.title(f"Grad-CAM - block5_conv3 (Score: {prediction.numpy()[0][0]:.2f})")
plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.tight_layout()
plt.show()

# === Show activation maps from 3 layers ===
for act, name in zip(activations, layer_names):
    show_activation_grid(act, name)

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4/model_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse/Fold2/Bad/E17203_IMG_8551.jpg'

# === Load model and image ===
model = load_model(model_path)
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
img_preprocessed = preprocess_input(img_batch)

# === Part 1: Grad-CAM from final conv layer ===
last_conv_layer = 'block1_conv2'
grad_model = Model(inputs=model.input, outputs=[model.get_layer(last_conv_layer).output, model.output])

with tf.GradientTape() as tape:
    conv_outputs, prediction = grad_model(img_preprocessed)
    loss = prediction[:, 0]

grads = tape.gradient(loss, conv_outputs)[0]
pooled_grads = tf.reduce_mean(grads, axis=(0, 1))
conv_outputs = conv_outputs[0] * pooled_grads[tf.newaxis, tf.newaxis, :]
heatmap = tf.reduce_mean(conv_outputs, axis=-1).numpy()
heatmap = np.maximum(heatmap, 0)
heatmap /= (np.max(heatmap) + 1e-8)
heatmap = cv2.resize(heatmap, (224, 224))
heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
overlay = cv2.addWeighted(heatmap_colored, 0.4, img_array.astype(np.uint8), 0.6, 0)

# === Part 2: Raw activation maps from earlier layers ===
layer_names = ['block1_conv2', 'block3_conv3', 'block5_conv3']
activation_model = Model(inputs=model.input, outputs=[model.get_layer(name).output for name in layer_names])
activations = activation_model.predict(img_preprocessed)

def show_activation_grid(activation, layer_name, max_features=8):
    num_features = min(max_features, activation.shape[-1])
    fig, axes = plt.subplots(1, num_features, figsize=(num_features * 2, 2))
    fig.suptitle(f"{layer_name} Activation Maps", fontsize=12)
    for i in range(num_features):
        ax = axes[i]
        feature_map = activation[0, :, :, i]
        feature_map -= feature_map.mean()
        feature_map /= (feature_map.std() + 1e-6)
        feature_map = np.clip(feature_map * 64 + 128, 0, 255).astype('uint8')
        ax.imshow(feature_map, cmap='gray')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# === Display results ===
plt.figure(figsize=(6, 5))
plt.title(f"Grad-CAM - block5_conv3 (Score: {prediction.numpy()[0][0]:.2f})")
plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.tight_layout()
plt.show()

# === Show activation maps from 3 layers ===
for act, name in zip(activations, layer_names):
    show_activation_grid(act, name)

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4/model_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse/Fold3/Good/22?_IMG_9933.jpg'

# === Load model and image ===
model = load_model(model_path)
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
img_preprocessed = preprocess_input(img_batch)

# === Part 1: Grad-CAM from final conv layer ===
last_conv_layer = 'block1_conv2'
grad_model = Model(inputs=model.input, outputs=[model.get_layer(last_conv_layer).output, model.output])

with tf.GradientTape() as tape:
    conv_outputs, prediction = grad_model(img_preprocessed)
    loss = prediction[:, 0]

grads = tape.gradient(loss, conv_outputs)[0]
pooled_grads = tf.reduce_mean(grads, axis=(0, 1))
conv_outputs = conv_outputs[0] * pooled_grads[tf.newaxis, tf.newaxis, :]
heatmap = tf.reduce_mean(conv_outputs, axis=-1).numpy()
heatmap = np.maximum(heatmap, 0)
heatmap /= (np.max(heatmap) + 1e-8)
heatmap = cv2.resize(heatmap, (224, 224))
heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
overlay = cv2.addWeighted(heatmap_colored, 0.4, img_array.astype(np.uint8), 0.6, 0)

# === Part 2: Raw activation maps from earlier layers ===
layer_names = ['block1_conv2', 'block3_conv3', 'block5_conv3']
activation_model = Model(inputs=model.input, outputs=[model.get_layer(name).output for name in layer_names])
activations = activation_model.predict(img_preprocessed)

def show_activation_grid(activation, layer_name, max_features=8):
    num_features = min(max_features, activation.shape[-1])
    fig, axes = plt.subplots(1, num_features, figsize=(num_features * 2, 2))
    fig.suptitle(f"{layer_name} Activation Maps", fontsize=12)
    for i in range(num_features):
        ax = axes[i]
        feature_map = activation[0, :, :, i]
        feature_map -= feature_map.mean()
        feature_map /= (feature_map.std() + 1e-6)
        feature_map = np.clip(feature_map * 64 + 128, 0, 255).astype('uint8')
        ax.imshow(feature_map, cmap='gray')
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# === Display results ===
plt.figure(figsize=(6, 5))
plt.title(f"Grad-CAM - block5_conv3 (Score: {prediction.numpy()[0][0]:.2f})")
plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.tight_layout()
plt.show()

# === Show activation maps from 3 layers ===
for act, name in zip(activations, layer_names):
    show_activation_grid(act, name)

Heatmap

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus4/model_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold3/Good/24?_IMG_9815_saturation.jpg'
target_size = (224, 224)
last_conv_layer_name = 'block5_conv3'  # Last conv layer in VGG16

# === LOAD MODEL ===
model = load_model(model_path)

# === LOAD IMAGE ===
img = load_img(image_path, target_size=target_size)
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0) / 255.0

# === GRAD-CAM MODEL ===
grad_model = tf.keras.models.Model(
    [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
)

# === GRADIENT TAPE ===
with tf.GradientTape() as tape:
    conv_outputs, predictions = grad_model(img_array)
    pred_index = tf.argmax(predictions[0])
    class_output = predictions[:, pred_index]

# === GRADIENTS ===
grads = tape.gradient(class_output, conv_outputs)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
conv_outputs = conv_outputs[0]

# === GENERATE HEATMAP ===
heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)
heatmap = np.maximum(heatmap, 0)
heatmap /= tf.math.reduce_max(heatmap) + 1e-8

# === LOAD ORIGINAL IMAGE ===
img_cv = cv2.imread(image_path)
img_cv = cv2.resize(img_cv, target_size)
heatmap = cv2.resize(heatmap.numpy(), (img_cv.shape[1], img_cv.shape[0]))
heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
overlay = cv2.addWeighted(img_cv, 0.6, heatmap_colored, 0.4, 0)

# === PLOT RESULTS ===
plt.figure(figsize=(12, 4))

plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
plt.title("Original Image")
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(heatmap, cmap='jet')
plt.title("Grad-CAM Heatmap")
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
plt.title("Overlay")
plt.axis('off')

plt.tight_layout()
plt.show()

VGG 1-5e

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.regularizers import l2
import pickle

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLR'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLR'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the VGG16-based model with improvements
def create_vgg16_model(image_shape, learning_rate=1e-5):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    x = Flatten()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)  # Added L2 Regularization
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)  # Increased dropout
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Cross-validation loop
validation_metrics = []
test_metrics = []

for val_fold_index in range(5):
    print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
    
    # Define training and validation sets
    train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
    val_fold = fold_dirs[val_fold_index]
    
    # Load training data from the selected folds
    X_train, y_train = [], []
    for train_fold in train_folds:
        images, labels = load_images_and_labels(train_fold)
        X_train.append(images)
        y_train.append(labels)
    X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
    
    # Load validation and test data
    X_val, y_val = load_images_and_labels(val_fold)
    X_test, y_test = load_images_and_labels(test_image_dir)
    
    # Train the model
    model = create_vgg16_model(X_train.shape[1:])
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, verbose=1, restore_best_weights=True),  # Increased patience
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, verbose=1, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras'), save_best_only=True, verbose=1)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,  # Increased epochs
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save training history
    history_path = os.path.join(history_save_dir, f'history_fold{val_fold_index + 1}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # Evaluate on validation set
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    validation_metrics.append({'fold': val_fold_index + 1, 'accuracy': val_accuracy * 100, 'loss': val_loss})
    print(f"Validation Accuracy for Fold {val_fold_index + 1}: {val_accuracy:.2f}%")
    
    # Evaluate on test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    test_metrics.append({'fold': val_fold_index + 1, 'accuracy': test_accuracy * 100, 'loss': test_loss})
    print(f"Test Accuracy for Fold {val_fold_index + 1}: {test_accuracy:.2f}%")
    
    # Save the model
    model.save(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras'))

# Metrics summary
print("\nValidation Metrics:")
for metric in validation_metrics:
    print(f"Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

print("\nTest Metrics:")
for metric in test_metrics:
    print(f"Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

# Compute averages and standard deviations
val_accuracies = [m['accuracy'] for m in validation_metrics]
test_accuracies = [m['accuracy'] for m in test_metrics]
print(f"\nAverage Validation Accuracy: {np.mean(val_accuracies):.2f}%, Std Dev: {np.std(val_accuracies):.2f}")
print(f"Average Test Accuracy: {np.mean(test_accuracies):.2f}%, Std Dev: {np.std(test_accuracies):.2f}")

Analyze VGG

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# --- CONFIG ---
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLR/model_fold1.keras'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
class_names = ['Bad', 'Good']

# --- Load model ---
model = load_model(model_path)

# --- Load test data ---
def load_images_with_paths(image_dir):
    images, labels, paths = [], [], []
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(image_dir, class_name)
        for fname in os.listdir(class_dir):
            if fname.endswith('.jpg'):
                img_path = os.path.join(class_dir, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_arr = img_to_array(img) / 255.0
                images.append(img_arr)
                labels.append(label)
                paths.append(img_path)
    return np.array(images), np.array(labels), paths

X_test, y_test, img_paths = load_images_with_paths(test_dir)

# --- Predictions ---
y_probs = model.predict(X_test)
y_pred = (y_probs > 0.5).astype(int).flatten()

# --- Separate correct and incorrect predictions ---
correct_idxs = np.where(y_pred == y_test)[0]
wrong_idxs = np.where(y_pred != y_test)[0]

def plot_predictions(indices, title, n=5):
    plt.figure(figsize=(15, 3))
    for i, idx in enumerate(indices[:n]):
        plt.subplot(1, n, i+1)
        img = load_img(img_paths[idx])
        plt.imshow(img)
        plt.axis('off')
        true_label = class_names[y_test[idx]]
        pred_label = class_names[y_pred[idx]]
        confidence = y_probs[idx][0]
        color = 'green' if y_pred[idx] == y_test[idx] else 'red'
        plt.title(f"True: {true_label}\nPred: {pred_label} ({confidence:.2f})", color=color)
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

# --- Show examples ---
plot_predictions(correct_idxs, "✅ Correct Predictions with Confidence")
plot_predictions(wrong_idxs, "❌ Incorrect Predictions with Confidence")

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# --- CONFIG ---
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLR/model_fold1.keras'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
class_names = ['Bad', 'Good']

# --- Load model ---
model = load_model(model_path)

# --- Load test data ---
def load_images_with_paths(image_dir):
    images, labels, paths = [], [], []
    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(image_dir, class_name)
        for fname in os.listdir(class_dir):
            if fname.endswith('.jpg'):
                img_path = os.path.join(class_dir, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_arr = img_to_array(img) / 255.0
                images.append(img_arr)
                labels.append(label)
                paths.append(img_path)
    return np.array(images), np.array(labels), paths

X_test, y_test, img_paths = load_images_with_paths(test_dir)

# --- Predictions ---
y_probs = model.predict(X_test)
y_pred = (y_probs > 0.5).astype(int).flatten()

# --- Correct 'Good' predictions ---
correct_good_idxs = [i for i in range(len(y_test)) if y_test[i] == 1 and y_pred[i] == 1]

# --- Plot only 2 correct 'Good' predictions ---
def plot_predictions(indices, title, n=2):
    plt.figure(figsize=(10, 4))
    for i, idx in enumerate(indices[:n]):
        plt.subplot(1, n, i+1)
        img = load_img(img_paths[idx])
        plt.imshow(img)
        plt.axis('off')
        true_label = class_names[y_test[idx]]
        pred_label = class_names[y_pred[idx]]
        confidence = y_probs[idx][0]
        plt.title(f"True: {true_label}\nPred: {pred_label} ({confidence:.2f})", color='green')
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

plot_predictions(correct_good_idxs, "", n=2)

In [None]:
import os
import numpy as np
import pickle
from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths

model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLR'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLR'


# Load test data
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_image_dir)

# Track metrics
all_metrics = []

# Loop through folds
for fold in range(1, 6):
    print(f"\nEvaluating Fold {fold}...")
    model_path = os.path.join(model_save_dir, f'model_fold{fold}.keras')
    
    if not os.path.exists(model_path):
        print(f"Model file not found for Fold {fold}")
        continue

    # Load model
    model = load_model(model_path)

    # Predict
    y_pred_probs = model.predict(X_test)
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    # Compute metrics
    f1 = f1_score(y_test, y_pred_labels, zero_division=1)
    precision = precision_score(y_test, y_pred_labels, zero_division=1)
    recall = recall_score(y_test, y_pred_labels, zero_division=1)
    precision_vals, recall_vals, _ = precision_recall_curve(y_test, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, AUC-PR: {auc_pr:.4f}")

    all_metrics.append({
        'fold': fold,
        'F1 Score': f1,
        'Precision': precision,
        'Recall': recall,
        'AUC-PR': auc_pr
    })

# Convert to DataFrame for summary
import pandas as pd
df_metrics = pd.DataFrame(all_metrics)
print("\n### VGG16 Test Set Metrics Summary ###")
print(df_metrics)

# Compute and print averages
print(f"\nAverage F1 Score: {df_metrics['F1 Score'].mean():.4f}")
print(f"Average Precision: {df_metrics['Precision'].mean():.4f}")
print(f"Average Recall: {df_metrics['Recall'].mean():.4f}")
print(f"Average AUC-PR: {df_metrics['AUC-PR'].mean():.4f}")

In [None]:
import os
import numpy as np
import pickle
from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Paths

base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLR'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLR'


# Load test data
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_image_dir)

# Track metrics
all_metrics = []

# Loop through folds
for fold in range(1, 6):
    print(f"\nEvaluating Fold {fold}...")
    model_path = os.path.join(model_save_dir, f'model_fold{fold}.keras')
    
    if not os.path.exists(model_path):
        print(f"Model file not found for Fold {fold}")
        continue

    # Load model
    model = load_model(model_path)

    # Predict
    y_pred_probs = model.predict(X_test)
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    # Compute metrics
    f1 = f1_score(y_test, y_pred_labels, zero_division=1)
    precision = precision_score(y_test, y_pred_labels, zero_division=1)
    recall = recall_score(y_test, y_pred_labels, zero_division=1)
    precision_vals, recall_vals, _ = precision_recall_curve(y_test, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, AUC-PR: {auc_pr:.4f}")

    all_metrics.append({
        'fold': fold,
        'F1 Score': f1,
        'Precision': precision,
        'Recall': recall,
        'AUC-PR': auc_pr
    })

# Convert to DataFrame for summary
import pandas as pd
df_metrics = pd.DataFrame(all_metrics)
print("\n### VGG16 Test Set Metrics Summary ###")
print(df_metrics)

# Compute and print averages
print(f"\nAverage F1 Score: {df_metrics['F1 Score'].mean():.4f}")
print(f"Average Precision: {df_metrics['Precision'].mean():.4f}")
print(f"Average Recall: {df_metrics['Recall'].mean():.4f}")
print(f"Average AUC-PR: {df_metrics['AUC-PR'].mean():.4f}")

In [None]:
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt

# Set the history directory
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLR'

# Collect metrics
folds = []
val_accs = []
train_accs = []
val_losses = []
train_losses = []

# Loop over folds
for fold in range(1, 6):
    history_path = os.path.join(history_dir, f'history_fold{fold}.pkl')
    
    if not os.path.exists(history_path):
        print(f"History file not found for Fold {fold}")
        continue

    with open(history_path, 'rb') as f:
        history = pickle.load(f)

    # Get best values
    best_val_acc = max(history['val_accuracy'])
    best_train_acc = max(history['accuracy'])
    best_val_loss = min(history['val_loss'])
    best_train_loss = min(history['loss'])

    folds.append(fold)
    val_accs.append(best_val_acc)
    train_accs.append(best_train_acc)
    val_losses.append(best_val_loss)
    train_losses.append(best_train_loss)

# Create DataFrame
df_hist = pd.DataFrame({
    'Fold': folds,
    'Val Accuracy': val_accs,
    'Train Accuracy': train_accs,
    'Val Loss': val_losses,
    'Train Loss': train_losses
})

print(df_hist)

In [None]:
import matplotlib.pyplot as plt

# You can use df_hist from earlier
plt.figure(figsize=(10, 6))

bubble = plt.scatter(
    df_hist['Fold'],
    df_hist['Val Accuracy'],
    s=(1 / df_hist['Val Loss']) * 2000,  # Bigger bubble for lower loss
    c=df_hist['Val Loss'],
    cmap='coolwarm',
    alpha=0.8,
    edgecolors='black',
    linewidth=1
)

# Labels and titles
plt.xlabel('Fold Number')
plt.ylabel('Validation Accuracy')
plt.title('Validation Accuracy Across Folds (Bubble Size ~ 1 / Val Loss)')
cbar = plt.colorbar(bubble)
cbar.set_label('Validation Loss')

# Annotate
for i, row in df_hist.iterrows():
    plt.text(row['Fold'], row['Val Accuracy'] + 0.003, f"Fold {int(row['Fold'])}", ha='center')

plt.grid(True, linestyle='--', alpha=0.5)
plt.xticks(df_hist['Fold'])  # Ensure all fold numbers are shown
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))

bubble = plt.scatter(
    df_metrics['fold'],
    df_metrics['F1 Score'],
    s=df_metrics['F1 Score'] * 1000,          # Bubble size ~ F1 Score
    c=df_metrics['AUC-PR'],                   # Color ~ AUC-PR
    cmap='viridis',
    alpha=0.8,
    edgecolors='black',
    linewidth=1
)

# Labels and title
plt.xlabel('Fold Number')
plt.ylabel('F1 Score')
plt.title('F1 Score per Fold (Bubble Size ~ F1, Color ~ AUC-PR)')
cbar = plt.colorbar(bubble)
cbar.set_label('AUC-PR')

# Annotate
for i, row in df_metrics.iterrows():
    plt.text(row['fold'], row['F1 Score'] + 0.01, f"Fold {int(row['fold'])}", ha='center')

plt.grid(True, linestyle='--', alpha=0.5)
plt.xticks(df_metrics['fold'])  # Show all fold numbers
plt.ylim(0, 1.1)  # F1 Score range
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Your data
df_metrics = pd.DataFrame({
    'Fold': [1, 2, 3, 4, 5],
    'F1 Score': [0.787879, 0.670588, 0.708075, 0.760000, 0.766355],
    'Precision': [0.928571, 0.504425, 0.548077, 0.612903, 0.820000],
    'Recall': [0.684211, 1.000000, 1.000000, 1.000000, 0.719298],
    'AUC-PR': [0.908445, 0.774038, 0.845158, 0.805466, 0.845102]
})

plt.figure(figsize=(10, 6))

bubble = plt.scatter(
    df_metrics['Fold'],
    df_metrics['F1 Score'],
    s=df_metrics['AUC-PR'] * 1000,  # Bubble size ~ AUC-PR
    c=df_metrics['Precision'],      # Color ~ Precision
    cmap='coolwarm',
    edgecolors='black',
    alpha=0.8,
    linewidth=1
)

# Labels and title
plt.xlabel('Fold')
plt.ylabel('F1 Score')
plt.title('F1 Score Across Folds (Size ~ AUC-PR, Color ~ Precision)')
cbar = plt.colorbar(bubble)
cbar.set_label('Precision')

# Annotate fold numbers
for i, row in df_metrics.iterrows():
    plt.text(row['Fold'], row['F1 Score'] + 0.01, f"Fold {int(row['Fold'])}", ha='center')

# Add average line
avg_f1 = df_metrics['F1 Score'].mean()
plt.axhline(avg_f1, color='gray', linestyle='--', label=f'Avg F1: {avg_f1:.2f}')
plt.legend()

plt.xticks(df_metrics['Fold'])
plt.ylim(0.6, 0.85)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Your metrics data
df_metrics = pd.DataFrame({
    'Fold': [1, 2, 3, 4, 5],
    'AUC-PR': [0.908445, 0.774038, 0.845158, 0.805466, 0.845102]
})

# Set plot style
sns.set(style="whitegrid")

plt.figure(figsize=(8, 6))
sns.violinplot(data=df_metrics, y='AUC-PR', inner='point', linewidth=1.2, color='skyblue')

# Labels and title
plt.title('Distribution of AUC-PR Across Folds', fontsize=14)
plt.ylabel('AUC-PR Score')
plt.xticks([])  # Hides the x-axis since there's only one metric

# Annotate average
avg_aucpr = df_metrics['AUC-PR'].mean()
plt.axhline(avg_aucpr, linestyle='--', color='gray', label=f'Avg: {avg_aucpr:.3f}')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt

# Directory containing saved histories
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLR'

# Load histories for all folds
histories = []
missing_folds = []

for fold in range(1, 6):
    history_path = os.path.join(history_save_dir, f'history_fold{fold}.pkl')
    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            histories.append(pickle.load(f))
    else:
        print(f"Warning: {history_path} not found.")
        missing_folds.append(fold)

# Ensure we have at least one valid history to plot
if not histories:
    print("No valid history files found. Please check the file paths.")
else:
    # Plot training and validation loss
    plt.figure(figsize=(12, 5))
    for fold, history in enumerate(histories, start=1):
        plt.plot(history['loss'], label=f'Train Loss Fold {fold}')
        plt.plot(history['val_loss'], '--', label=f'Val Loss Fold {fold}')

    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Across 5 Folds')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Plot training and validation accuracy
    plt.figure(figsize=(12, 5))
    for fold, history in enumerate(histories, start=1):
        plt.plot(history['accuracy'], label=f'Train Acc Fold {fold}')
        plt.plot(history['val_accuracy'], '--', label=f'Val Acc Fold {fold}')

    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy Across 5 Folds')
    plt.legend()
    plt.grid(True)
    plt.show()

if missing_folds:
    print(f"Missing history files for folds: {missing_folds}. Please check if they were saved correctly.")

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix, classification_report

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLR'

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Initialize confusion matrices for validation and test sets
all_conf_matrices_val = []
all_conf_matrices_test = []

for val_fold_index in range(5):
    print(f"\nProcessing Fold {val_fold_index + 1} for Confusion Matrix")

    # Load validation and test data
    X_val, y_val = load_images_and_labels(fold_dirs[val_fold_index])
    X_test, y_test = load_images_and_labels(test_image_dir)

    # Load trained model
    model_path = os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras')
    if not os.path.exists(model_path):
        print(f"Model not found for Fold {val_fold_index + 1}: {model_path}")
        continue
    model = load_model(model_path)

    # Predict on validation and test sets
    y_val_pred = (model.predict(X_val) > 0.5).astype("int32")
    y_test_pred = (model.predict(X_test) > 0.5).astype("int32")

    # Compute confusion matrices
    conf_matrix_val = confusion_matrix(y_val, y_val_pred)
    conf_matrix_test = confusion_matrix(y_test, y_test_pred)
    all_conf_matrices_val.append(conf_matrix_val)
    all_conf_matrices_test.append(conf_matrix_test)

    print(f"Validation Set Classification Report for Fold {val_fold_index + 1}:\n", classification_report(y_val, y_val_pred))
    print(f"Test Set Classification Report for Fold {val_fold_index + 1}:\n", classification_report(y_test, y_test_pred))

# Plot confusion matrices for Validation Set
fig, axes = plt.subplots(1, 5, figsize=(20, 4))
for i, cm in enumerate(all_conf_matrices_val):
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'], ax=axes[i])
    axes[i].set_title(f'Validation Confusion Matrix Fold {i+1}')
    axes[i].set_xlabel('Predicted')
    axes[i].set_ylabel('Actual')

plt.tight_layout()
plt.show()

# Plot confusion matrices for Test Set
fig, axes = plt.subplots(1, 5, figsize=(20, 4))
for i, cm in enumerate(all_conf_matrices_test):
    sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'], ax=axes[i])
    axes[i].set_title(f'Test Confusion Matrix Fold {i+1}')
    axes[i].set_xlabel('Predicted')
    axes[i].set_ylabel('Actual')

plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Summed confusion matrices across all 5 folds
conf_matrix_val = np.array([[1018, 277],
                            [312,  983]])

conf_matrix_test = np.array([[256,  24],
                             [103, 182]])

# Plot Validation Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix_val, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'])
plt.title('Combined Confusion Matrix - Validation (All Folds)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

# Plot Test Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix_test, annot=True, fmt='d', cmap='Greens',
            xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'])
plt.title('Combined Confusion Matrix - Test (All Folds)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Rounded averaged confusion matrices (no decimals, no commas)
avg_conf_matrix_val = np.array([[204, 55],
                                [62, 197]])

avg_conf_matrix_test = np.array([[51, 5],
                                 [21, 36]])

# Plot Averaged Validation Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(avg_conf_matrix_val, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'])
plt.title('Averaged Confusion Matrix - Validation (All Folds)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

# Plot Averaged Test Confusion Matrix
plt.figure(figsize=(6, 5))
sns.heatmap(avg_conf_matrix_test, annot=True, fmt='d', cmap='Greens',
            xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'])
plt.title('Averaged Confusion Matrix - Test (All Folds)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

Try -6 lr

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.regularizers import l2
import pickle

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Models_mainUse2MetNuweChangesBestLRMinus6'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLRMinus6'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the VGG16-based model with improvements
def create_vgg16_model(image_shape, learning_rate=1e-6):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    x = Flatten()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)  # Added L2 Regularization
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)  # Increased dropout
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Cross-validation loop
validation_metrics = []
test_metrics = []

for val_fold_index in range(5):
    print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
    
    # Define training and validation sets
    train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
    val_fold = fold_dirs[val_fold_index]
    
    # Load training data from the selected folds
    X_train, y_train = [], []
    for train_fold in train_folds:
        images, labels = load_images_and_labels(train_fold)
        X_train.append(images)
        y_train.append(labels)
    X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
    
    # Load validation and test data
    X_val, y_val = load_images_and_labels(val_fold)
    X_test, y_test = load_images_and_labels(test_image_dir)
    
    # Train the model
    model = create_vgg16_model(X_train.shape[1:])
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, verbose=1, restore_best_weights=True),  # Increased patience
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, verbose=1, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras'), save_best_only=True, verbose=1)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,  # Increased epochs
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save training history
    history_path = os.path.join(history_save_dir, f'history_fold{val_fold_index + 1}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)
    
    # Evaluate on validation set
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    validation_metrics.append({'fold': val_fold_index + 1, 'accuracy': val_accuracy * 100, 'loss': val_loss})
    print(f"Validation Accuracy for Fold {val_fold_index + 1}: {val_accuracy:.2f}%")
    
    # Evaluate on test set
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    test_metrics.append({'fold': val_fold_index + 1, 'accuracy': test_accuracy * 100, 'loss': test_loss})
    print(f"Test Accuracy for Fold {val_fold_index + 1}: {test_accuracy:.2f}%")
    
    # Save the model
    model.save(os.path.join(model_save_dir, f'model_fold{val_fold_index + 1}.keras'))

# Metrics summary
print("\nValidation Metrics:")
for metric in validation_metrics:
    print(f"Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

print("\nTest Metrics:")
for metric in test_metrics:
    print(f"Fold {metric['fold']}: Accuracy: {metric['accuracy']:.2f}%, Loss: {metric['loss']:.4f}")

# Compute averages and standard deviations
val_accuracies = [m['accuracy'] for m in validation_metrics]
test_accuracies = [m['accuracy'] for m in test_metrics]
print(f"\nAverage Validation Accuracy: {np.mean(val_accuracies):.2f}%, Std Dev: {np.std(val_accuracies):.2f}")
print(f"Average Test Accuracy: {np.mean(test_accuracies):.2f}%, Std Dev: {np.std(test_accuracies):.2f}")

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# ✅ Corrected path to Fold 1 history
history_path = '/Users/suzetteschulenburg/Desktop/MainUse/Saved_Histories_mainseetN2uweChnagesBestLRMinus6/history_fold1.pkl'

# Load training history
if not os.path.exists(history_path):
    raise FileNotFoundError(f"Could not find history file: {history_path}")

with open(history_path, 'rb') as f:
    history = pickle.load(f)

# Extract values
train_loss = history.get('loss', [])
val_loss = history.get('val_loss', [])
train_acc = history.get('accuracy', [])
val_acc = history.get('val_accuracy', [])
epochs = range(1, len(train_loss) + 1)

# === Plot ===
plt.figure(figsize=(12, 6))

# Loss subplot
plt.subplot(1, 2, 1)
plt.plot(epochs, train_loss, label='Train Loss', color='red')
plt.plot(epochs, val_loss, label='Validation Loss', color='blue')
plt.title('Loss over Epochs (LR = 1e-6)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Accuracy subplot
plt.subplot(1, 2, 2)
plt.plot(epochs, train_acc, label='Train Accuracy', color='green')
plt.plot(epochs, val_acc, label='Validation Accuracy', color='orange')
plt.title('Accuracy over Epochs (LR = 1e-6)')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Final formatting
plt.suptitle('Training Performance for Fold 1 — Very Low LR = 1e-6 (Bad Learning Rate)')
plt.tight_layout(rect=[0, 0, 1, 0.93])
plt.show()

In [None]:
import os
import random
from collections import defaultdict

# Paths
source_folds_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'  # Original folds directory
categories = ['Good', 'Bad']  # Classes to balance

# Collect all images grouped by base ID
def collect_images_by_base_id(fold_dir, categories):
    base_id_dict = {category: defaultdict(list) for category in categories}

    for category in categories:
        category_dir = os.path.join(fold_dir, category)
        if not os.path.exists(category_dir):
            continue

        for filename in os.listdir(category_dir):
            if filename.lower().endswith('.jpg'):
                base_id = filename.split('_')[0]  # Extract base ID (e.g., E2025 from E2025_IMG_8469.jpg)
                img_path = os.path.join(category_dir, filename)
                base_id_dict[category][base_id].append(img_path)

    return base_id_dict

# Get the minimum count between `Good` and `Bad` for balancing
def get_minimum_count(base_id_dict):
    good_count = sum(len(images) for images in base_id_dict['Good'].values())
    bad_count = sum(len(images) for images in base_id_dict['Bad'].values())
    return min(good_count, bad_count)

# Balance images directly in the source directory
def balance_images_in_place(source_folds_dir, num_folds, categories):
    for fold in range(1, num_folds + 1):
        print(f"Processing Fold {fold}...")

        # Collect images by base ID
        fold_dir = os.path.join(source_folds_dir, f'Fold{fold}')
        base_id_dict = collect_images_by_base_id(fold_dir, categories)

        # Determine target count for balancing
        target_count = get_minimum_count(base_id_dict)
        print(f"Target count per class: {target_count}")

        # Balance each category
        for category in categories:
            category_dir = os.path.join(fold_dir, category)
            all_base_ids = list(base_id_dict[category].keys())
            random.shuffle(all_base_ids)  # Shuffle to ensure randomness

            current_count = 0
            for base_id in all_base_ids:
                images = base_id_dict[category][base_id]

                # If adding the entire base ID exceeds the target, remove excess images
                if current_count + len(images) > target_count:
                    excess_images = images[target_count - current_count:]
                    for img_path in excess_images:
                        os.remove(img_path)  # Remove the excess images
                    print(f"Removed {len(excess_images)} images from base ID {base_id} in {category}")
                    break
                else:
                    current_count += len(images)

    print("Balancing complete. Original folds updated.")

# Run the balancing process
balance_images_in_place(source_folds_dir, num_folds=5, categories=categories)

# Other models

## Experiment with Learning rate ResNet

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Learning rates to test
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the ResNet50 model
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze base ResNet layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Dictionary to track performance
results = {lr: {'accuracy': [], 'f1_score': []} for lr in learning_rates}

# Cross-validation loop
for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")
    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
        
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold)
        
        # Train the model
        model = create_resnet50_model(X_train.shape[1:], lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{val_fold_index + 1}.keras'), save_best_only=True)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=30,  # Reduced for efficiency
            batch_size=32,
            callbacks=callbacks,
            verbose=2
        )
        
        # Save history
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{val_fold_index + 1}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_preds = model.predict(X_val) > 0.5
        val_f1 = f1_score(y_val, val_preds)
        val_accuracy = np.mean(val_preds.flatten() == y_val)
        
        results[lr]['accuracy'].append(val_accuracy * 100)
        results[lr]['f1_score'].append(val_f1)
        
        print(f"Fold {val_fold_index + 1} - Accuracy: {val_accuracy:.2%}, F1 Score: {val_f1:.4f}")

        # Free up memory after each fold
        del model
        tf.keras.backend.clear_session()

# Compute average accuracy and F1-score per learning rate
avg_results = {lr: {'accuracy': np.mean(results[lr]['accuracy']), 'f1_score': np.mean(results[lr]['f1_score'])} for lr in learning_rates}

# Plot results
plt.figure(figsize=(10, 5))
plt.plot(learning_rates, [avg_results[lr]['accuracy'] for lr in learning_rates], marker='o', label='Accuracy (%)')
plt.plot(learning_rates, [avg_results[lr]['f1_score'] for lr in learning_rates], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('Learning Rate vs Performance - ResNet50')
plt.legend()
plt.grid(True)
plt.show()

Analyze models LR

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories'

# Learning rates tested
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5  # Number of folds used in cross-validation

# Function to load validation images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Dictionaries to store F1 scores and Validation Loss per learning rate
f1_scores = {lr: [] for lr in learning_rates}
val_losses = {lr: [] for lr in learning_rates}

# Loop through learning rates and folds
for lr in learning_rates:
    for fold in range(1, num_folds + 1):
        model_path = os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{fold}.keras')
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{fold}.pkl')
        val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

        if os.path.exists(model_path):
            print(f"Loading model for LR={lr}, Fold={fold}...")
            model = load_model(model_path)  # Load saved model
            
            # Load validation data
            X_val, y_val = load_images_and_labels(val_fold_dir)
            if len(X_val) == 0:
                print(f"Warning: No validation images found for LR={lr}, Fold={fold}")
                continue
            
            # Compute predictions
            val_preds = (model.predict(X_val) > 0.5).astype(int)  # Convert to binary
            
            # Compute F1-score
            val_f1 = f1_score(y_val, val_preds, zero_division=1)
            f1_scores[lr].append(val_f1)
            
            print(f"Fold {fold} - F1 Score: {val_f1:.4f}")

        else:
            print(f"Warning: Model not found for LR={lr}, Fold={fold}")

        # Load Validation Loss from History File
        if os.path.exists(history_path):
            with open(history_path, 'rb') as f:
                history = pickle.load(f)

            # Get best (minimum) validation loss
            if 'val_loss' in history:
                min_val_loss = min(history['val_loss'])
                val_losses[lr].append(min_val_loss)
            else:
                print(f"Warning: val_loss missing in history for LR={lr}, Fold={fold}")
                val_losses[lr].append(None)
        else:
            print(f"Warning: History file not found for LR={lr}, Fold={fold}")
            val_losses[lr].append(None)

# Compute the average F1-score and Validation Loss per learning rate
avg_f1_scores = {lr: np.mean(f1_scores[lr]) if f1_scores[lr] else 0 for lr in learning_rates}
avg_val_losses = {lr: np.mean([x for x in val_losses[lr] if x is not None]) if val_losses[lr] else None for lr in learning_rates}

# Plot F1-score and Validation Loss per Learning Rate
fig, ax1 = plt.subplots(figsize=(10, 5))

# Plot F1 Score
ax1.set_xlabel('Learning Rate')
ax1.set_xscale('log')
ax1.set_ylabel('F1 Score', color='g')
ax1.plot(learning_rates, [avg_f1_scores[lr] for lr in learning_rates], marker='o', linestyle='-', color='g', label='Avg F1 Score')
ax1.tick_params(axis='y', labelcolor='g')

# Twin axis for Validation Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='b')
ax2.plot(learning_rates, [avg_val_losses[lr] for lr in learning_rates], marker='s', linestyle='--', color='b', label='Avg Val Loss')
ax2.tick_params(axis='y', labelcolor='b')

# Title and Legend
fig.suptitle('F1 Score & Validation Loss per Learning Rate - ResNet50')
fig.tight_layout()
fig.legend(loc="upper right")
plt.grid(True)
plt.show()

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories'

# Learning rates tested
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5  # Number of folds used in cross-validation

# Function to load validation images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Initialize metric dictionaries
metrics = {
    "f1": {lr: [] for lr in learning_rates},
    "precision": {lr: [] for lr in learning_rates},
    "recall": {lr: [] for lr in learning_rates},
    "accuracy": {lr: [] for lr in learning_rates},
    "auc_pr": {lr: [] for lr in learning_rates},
    "val_loss": {lr: [] for lr in learning_rates}
}

# Loop through learning rates and folds
for lr in learning_rates:
    for fold in range(1, num_folds + 1):
        model_path = os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{fold}.keras')
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{fold}.pkl')
        val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

        if os.path.exists(model_path):
            print(f"Loading model for LR={lr}, Fold={fold}...")
            model = load_model(model_path)  # Load saved model
            
            # Load validation data
            X_val, y_val = load_images_and_labels(val_fold_dir)
            if len(X_val) == 0:
                print(f"Warning: No validation images found for LR={lr}, Fold={fold}")
                continue
            
            # Compute predictions
            y_pred_probs = model.predict(X_val)  # Get probabilities
            y_pred_labels = (y_pred_probs > 0.5).astype(int)  # Convert to binary

            # Compute metrics
            f1 = f1_score(y_val, y_pred_labels, zero_division=1)
            precision = precision_score(y_val, y_pred_labels, zero_division=1)
            recall = recall_score(y_val, y_pred_labels, zero_division=1)
            accuracy = accuracy_score(y_val, y_pred_labels)

            # Compute Precision-Recall AUC
            precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
            auc_pr = auc(recall_vals, precision_vals)

            # Store metrics
            metrics["f1"][lr].append(f1)
            metrics["precision"][lr].append(precision)
            metrics["recall"][lr].append(recall)
            metrics["accuracy"][lr].append(accuracy)
            metrics["auc_pr"][lr].append(auc_pr)

            print(f"Fold {fold} - LR {lr}: F1={f1:.4f}, Precision={precision:.4f}, Recall={recall:.4f}, AUC-PR={auc_pr:.4f}")

        else:
            print(f"Warning: Model not found for LR={lr}, Fold={fold}")

        # Load Validation Loss from History File
        if os.path.exists(history_path):
            with open(history_path, 'rb') as f:
                history = pickle.load(f)

            if 'val_loss' in history:
                min_val_loss = min(history['val_loss'])
                metrics["val_loss"][lr].append(min_val_loss)
            else:
                print(f"Warning: val_loss missing in history for LR={lr}, Fold={fold}")
                metrics["val_loss"][lr].append(None)
        else:
            print(f"Warning: History file not found for LR={lr}, Fold={fold}")
            metrics["val_loss"][lr].append(None)

# Compute averages per learning rate
avg_metrics = {
    metric: {lr: np.nanmean(metrics[metric][lr]) for lr in learning_rates} for metric in metrics
}

# Plot Metrics
fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot F1 Score
ax1.set_xlabel('Learning Rate')
ax1.set_xscale('log')
ax1.set_ylabel('F1 Score', color='g')
ax1.plot(learning_rates, [avg_metrics["f1"][lr] for lr in learning_rates], marker='o', linestyle='-', color='g', label='Avg F1 Score')
ax1.tick_params(axis='y', labelcolor='g')

# Twin axis for Validation Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='b')
ax2.plot(learning_rates, [avg_metrics["val_loss"][lr] for lr in learning_rates], marker='s', linestyle='--', color='b', label='Avg Val Loss')
ax2.tick_params(axis='y', labelcolor='b')

# Add Accuracy plot
fig, ax3 = plt.subplots(figsize=(12, 6))
ax3.set_xlabel('Learning Rate')
ax3.set_xscale('log')
ax3.set_ylabel('Accuracy', color='r')
ax3.plot(learning_rates, [avg_metrics["accuracy"][lr] for lr in learning_rates], marker='^', linestyle='-', color='r', label='Avg Accuracy')
ax3.tick_params(axis='y', labelcolor='r')

# Titles and Legends
fig.suptitle('Performance Metrics per Learning Rate - ResNet50')
ax1.legend(loc="upper left")
ax2.legend(loc="upper right")
ax3.legend(loc="upper right")
plt.grid(True)
plt.show()

# Print Results
print("\n### Summary Metrics Per Learning Rate ###")
for lr in learning_rates:
    print(f"LR: {lr:.0e} | F1: {avg_metrics['f1'][lr]:.4f} | Precision: {avg_metrics['precision'][lr]:.4f} | Recall: {avg_metrics['recall'][lr]:.4f} | AUC-PR: {avg_metrics['auc_pr'][lr]:.4f} | Accuracy: {avg_metrics['accuracy'][lr]:.4f} | Val Loss: {avg_metrics['val_loss'][lr]:.4f}")

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np

# === Settings ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories'
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5

# === Containers for averaged metrics ===
avg_train_acc = {}
avg_val_acc = {}
avg_train_loss = {}
avg_val_loss = {}

# === Load and average histories ===
for lr in learning_rates:
    # Dynamically format learning rate key
    if lr >= 1e-4:
        lr_key = f"{lr:.4f}".rstrip('0').rstrip('.')  # e.g. 0.001
    else:
        lr_key = f"{lr:.0e}".replace('E', 'e')         # e.g. 5e-05

    train_acc_all, val_acc_all = [], []
    train_loss_all, val_loss_all = [], []

    for fold in range(1, num_folds + 1):
        filename = f"history_resnet_lr{lr_key}_fold{fold}.pkl"
        filepath = os.path.join(history_save_dir, filename)

        if os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                history = pickle.load(f)

            if all(k in history for k in ['accuracy', 'val_accuracy', 'loss', 'val_loss']):
                train_acc_all.append(history['accuracy'])
                val_acc_all.append(history['val_accuracy'])
                train_loss_all.append(history['loss'])
                val_loss_all.append(history['val_loss'])
            else:
                print(f"⚠️ Missing keys in: {filename}")
        else:
            print(f"❌ File not found: {filename}")

    # Truncate sequences to same length
    def truncate(histories):
        min_len = min(len(h) for h in histories)
        return np.array([h[:min_len] for h in histories])

    if train_acc_all:
        avg_train_acc[lr_key] = np.mean(truncate(train_acc_all), axis=0)
        avg_val_acc[lr_key] = np.mean(truncate(val_acc_all), axis=0)
        avg_train_loss[lr_key] = np.mean(truncate(train_loss_all), axis=0)
        avg_val_loss[lr_key] = np.mean(truncate(val_loss_all), axis=0)
        print(f"✅ Averaged data loaded for LR={lr_key}")
    else:
        print(f"⚠️ No data for LR={lr_key}")

# === Plotting ===
fig, axs = plt.subplots(1, 2, figsize=(14, 5))

# --- Accuracy ---
for lr_key in avg_train_acc:
    axs[0].plot(avg_train_acc[lr_key], linestyle='--', label=f'Train LR={lr_key}')
    axs[0].plot(avg_val_acc[lr_key], linestyle='-', label=f'Val LR={lr_key}')
axs[0].set_title('Training and Validation Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].legend()
axs[0].grid(True)

# --- Loss ---
for lr_key in avg_train_loss:
    axs[1].plot(avg_train_loss[lr_key], linestyle='--', label=f'Train LR={lr_key}')
    axs[1].plot(avg_val_loss[lr_key], linestyle='-', label=f'Val LR={lr_key}')
axs[1].set_title('Training and Validation Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].legend()
axs[1].grid(True)

fig.suptitle('ResNet50 - Learning Rate Comparison (Accuracy & Loss)', fontsize=16)
plt.tight_layout()
plt.show()

## More Patience 2 best learning rates

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsBest2'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesBest2'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Learning rates to test
learning_rates = [5e-6, 1e-5]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the ResNet50 model
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze base ResNet layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Dictionary to track performance
results = {lr: {'accuracy': [], 'f1_score': []} for lr in learning_rates}

# Cross-validation loop
for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")
    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
        
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold)
        
        # Train the model
        model = create_resnet50_model(X_train.shape[1:], lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{val_fold_index + 1}.keras'), save_best_only=True)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=70,  
            batch_size=32,
            callbacks=callbacks,
            verbose=2
        )
        
        # Save history
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{val_fold_index + 1}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_preds = model.predict(X_val) > 0.5
        val_f1 = f1_score(y_val, val_preds)
        val_accuracy = np.mean(val_preds.flatten() == y_val)
        
        results[lr]['accuracy'].append(val_accuracy * 100)
        results[lr]['f1_score'].append(val_f1)
        
        print(f"Fold {val_fold_index + 1} - Accuracy: {val_accuracy:.2%}, F1 Score: {val_f1:.4f}")

        # Free up memory after each fold
        del model
        tf.keras.backend.clear_session()

# Compute average accuracy and F1-score per learning rate
avg_results = {lr: {'accuracy': np.mean(results[lr]['accuracy']), 'f1_score': np.mean(results[lr]['f1_score'])} for lr in learning_rates}

# Plot results
plt.figure(figsize=(10, 5))
plt.plot(learning_rates, [avg_results[lr]['accuracy'] for lr in learning_rates], marker='o', label='Accuracy (%)')
plt.plot(learning_rates, [avg_results[lr]['f1_score'] for lr in learning_rates], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('Learning Rate vs Performance - ResNet50')
plt.legend()
plt.grid(True)
plt.show()

Analyze

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesBest2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsBest2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
learning_rates = [5e-5, 1e-5]
num_folds = 5

# === Load test data ===
def load_test_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        class_path = os.path.join(image_dir, subdir)
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(class_path):
            if fname.endswith('.jpg'):
                path = os.path.join(class_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_test, y_test, test_paths = load_test_images_and_labels(test_image_dir)

# === Plot histories and test metrics ===
for lr in learning_rates:
    plt.figure(figsize=(12, 5))
    val_accuracies, val_losses, train_accuracies, train_losses = [], [], [], []

    print(f"\n🔍 Results for Learning Rate = {lr}")
    f1s, precisions, recalls, auprcs = [], [], [], []

    for fold in range(1, num_folds + 1):
        # === Load history ===
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{fold}.pkl')
        with open(history_path, 'rb') as f:
            history = pickle.load(f)

        train_acc = history['accuracy']
        val_acc = history['val_accuracy']
        train_loss = history['loss']
        val_loss = history['val_loss']
        
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        # === Load model and predict on test set ===
        model_path = os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{fold}.keras')
        model = load_model(model_path)
        y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

        f1s.append(f1_score(y_test, y_pred))
        precisions.append(precision_score(y_test, y_pred))
        recalls.append(recall_score(y_test, y_pred))
        auprcs.append(average_precision_score(y_test, y_pred))

    # === Plot Accuracy ===
    plt.subplot(1, 2, 1)
    for i in range(num_folds):
        plt.plot(val_accuracies[i], label=f'Fold {i+1} Val Acc')
        plt.plot(train_accuracies[i], linestyle='--', label=f'Fold {i+1} Train Acc')
    plt.title(f'Accuracy (LR={lr})')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    # === Plot Loss ===
    plt.subplot(1, 2, 2)
    for i in range(num_folds):
        plt.plot(val_losses[i], label=f'Fold {i+1} Val Loss')
        plt.plot(train_losses[i], linestyle='--', label=f'Fold {i+1} Train Loss')
    plt.title(f'Loss (LR={lr})')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.suptitle(f"Training Curves for LR = {lr}")
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()

    # === Print Test Metrics ===
    print(f"🧪 Test Set Metrics (Average Across Folds) for LR = {lr}:")
    print(f"🔹 F1 Score:  {np.mean(f1s):.4f}")
    print(f"🔹 Precision: {np.mean(precisions):.4f}")
    print(f"🔹 Recall:    {np.mean(recalls):.4f}")
    print(f"🔹 AUC-PR:    {np.mean(auprcs):.4f}")

Analyze 1e-5

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    f1_score, precision_score, recall_score,
    average_precision_score, confusion_matrix
)
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
learning_rate = 1e-5
num_folds = 5

history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesBest2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsBest2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'

# === Load test data ===
def load_test_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        class_path = os.path.join(image_dir, subdir)
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(class_path):
            if fname.endswith('.jpg'):
                path = os.path.join(class_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_test, y_test, test_paths = load_test_images_and_labels(test_image_dir)

# === Plot histories and test metrics ===
plt.figure(figsize=(12, 5))
val_accuracies, val_losses, train_accuracies, train_losses = [], [], [], []

print(f"\n🔍 Results for Learning Rate = {learning_rate}")
f1s, precisions, recalls, auprcs = [], [], [], []

for fold in range(1, num_folds + 1):
    print(f"\n📁 Fold {fold}")
    
    # === Load history ===
    history_path = os.path.join(history_save_dir, f'history_resnet_lr{learning_rate}_fold{fold}.pkl')
    with open(history_path, 'rb') as f:
        history = pickle.load(f)

    train_acc = history['accuracy']
    val_acc = history['val_accuracy']
    train_loss = history['loss']
    val_loss = history['val_loss']

    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # === Load model and predict on test set ===
    model_path = os.path.join(model_save_dir, f'model_resnet_lr{learning_rate}_fold{fold}.keras')
    model = load_model(model_path)
    y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

    # === Metrics per fold ===
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    auprc = average_precision_score(y_test, y_pred)

    f1s.append(f1)
    precisions.append(precision)
    recalls.append(recall)
    auprcs.append(auprc)

    print(f"🔹 F1 Score:  {f1:.4f}")
    print(f"🔹 Precision: {precision:.4f}")
    print(f"🔹 Recall:    {recall:.4f}")
    print(f"🔹 AUC-PR:    {auprc:.4f}")

    # === Confusion Matrix ===
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'])
    plt.title(f"Confusion Matrix - Fold {fold}")
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.show()

# === Plot Accuracy ===
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
for i in range(num_folds):
    plt.plot(val_accuracies[i], label=f'Fold {i+1} Val Acc')
    plt.plot(train_accuracies[i], linestyle='--', label=f'Fold {i+1} Train Acc')
plt.title(f'Accuracy (LR={learning_rate})')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# === Plot Loss ===
plt.subplot(1, 2, 2)
for i in range(num_folds):
    plt.plot(val_losses[i], label=f'Fold {i+1} Val Loss')
    plt.plot(train_losses[i], linestyle='--', label=f'Fold {i+1} Train Loss')
plt.title(f'Loss (LR={learning_rate})')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.suptitle(f"Training Curves for LR = {learning_rate}")
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# === Print Average Test Metrics ===
print(f"\n📊 Average Test Set Metrics for LR = {learning_rate}")
print(f"🔹 F1 Score:  {np.mean(f1s):.4f}")
print(f"🔹 Precision: {np.mean(precisions):.4f}")
print(f"🔹 Recall:    {np.mean(recalls):.4f}")
print(f"🔹 AUC-PR:    {np.mean(auprcs):.4f}")

1e-5 with more patience

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_BestResNet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_BestResNet'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Learning rates to test
learning_rates = [1e-5]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the ResNet50 model
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze base ResNet layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Dictionary to track performance
results = {lr: {'accuracy': [], 'f1_score': []} for lr in learning_rates}

# Cross-validation loop
for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")
    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
        
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold)
        
        # Train the model
        model = create_resnet50_model(X_train.shape[1:], lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{val_fold_index + 1}.keras'), save_best_only=True)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=150,  
            batch_size=32,
            callbacks=callbacks,
            verbose=2
        )
        
        # Save history
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{val_fold_index + 1}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_preds = model.predict(X_val) > 0.5
        val_f1 = f1_score(y_val, val_preds)
        val_accuracy = np.mean(val_preds.flatten() == y_val)
        
        results[lr]['accuracy'].append(val_accuracy * 100)
        results[lr]['f1_score'].append(val_f1)
        
        print(f"Fold {val_fold_index + 1} - Accuracy: {val_accuracy:.2%}, F1 Score: {val_f1:.4f}")

        # Free up memory after each fold
        del model
        tf.keras.backend.clear_session()

# Compute average accuracy and F1-score per learning rate
avg_results = {lr: {'accuracy': np.mean(results[lr]['accuracy']), 'f1_score': np.mean(results[lr]['f1_score'])} for lr in learning_rates}

# Plot results
plt.figure(figsize=(10, 5))
plt.plot(learning_rates, [avg_results[lr]['accuracy'] for lr in learning_rates], marker='o', label='Accuracy (%)')
plt.plot(learning_rates, [avg_results[lr]['f1_score'] for lr in learning_rates], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('Learning Rate vs Performance - ResNet50')
plt.legend()
plt.grid(True)
plt.show()

Smaller learning rates and more epochs 1e-6 5e-6

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsBest2Minus6Resnet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesBest2Minus6Resnet'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Learning rates to test
learning_rates = [6e-5, 1e-6]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the ResNet50 model
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze base ResNet layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Dictionary to track performance
results = {lr: {'accuracy': [], 'f1_score': []} for lr in learning_rates}

# Cross-validation loop
for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")
    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
        
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold)
        
        # Train the model
        model = create_resnet50_model(X_train.shape[1:], lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold{val_fold_index + 1}.keras'), save_best_only=True)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=150,  
            batch_size=64,
            callbacks=callbacks,
            verbose=2
        )
        
        # Save history
        history_path = os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold{val_fold_index + 1}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_preds = model.predict(X_val) > 0.5
        val_f1 = f1_score(y_val, val_preds)
        val_accuracy = np.mean(val_preds.flatten() == y_val)
        
        results[lr]['accuracy'].append(val_accuracy * 100)
        results[lr]['f1_score'].append(val_f1)
        
        print(f"Fold {val_fold_index + 1} - Accuracy: {val_accuracy:.2%}, F1 Score: {val_f1:.4f}")

        # Free up memory after each fold
        del model
        tf.keras.backend.clear_session()

# Compute average accuracy and F1-score per learning rate
avg_results = {lr: {'accuracy': np.mean(results[lr]['accuracy']), 'f1_score': np.mean(results[lr]['f1_score'])} for lr in learning_rates}

# Plot results
plt.figure(figsize=(10, 5))
plt.plot(learning_rates, [avg_results[lr]['accuracy'] for lr in learning_rates], marker='o', label='Accuracy (%)')
plt.plot(learning_rates, [avg_results[lr]['f1_score'] for lr in learning_rates], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('Learning Rate vs Performance - ResNet50')
plt.legend()
plt.grid(True)
plt.show()

Analyze 6e-5

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
learning_rate = 6e-5
num_folds = 5

base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsBest2Minus6Resnet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesBest2Minus6Resnet'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load test data ===
def load_test_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        class_path = os.path.join(image_dir, subdir)
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(class_path):
            if fname.endswith('.jpg'):
                path = os.path.join(class_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_test, y_test, test_paths = load_test_images_and_labels(test_image_dir)

# === Plot histories and test metrics ===
plt.figure(figsize=(12, 5))
val_accuracies, val_losses, train_accuracies, train_losses = [], [], [], []

print(f"\n🔍 Results for Learning Rate = {learning_rate}")
f1s, precisions, recalls, auprcs = [], [], [], []

for fold in range(1, num_folds + 1):
    # === Load history ===
    history_path = os.path.join(history_save_dir, f'history_resnet_lr{learning_rate}_fold{fold}.pkl')
    with open(history_path, 'rb') as f:
        history = pickle.load(f)

    train_acc = history['accuracy']
    val_acc = history['val_accuracy']
    train_loss = history['loss']
    val_loss = history['val_loss']

    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # === Load model and predict on test set ===
    model_path = os.path.join(model_save_dir, f'model_resnet_lr{learning_rate}_fold{fold}.keras')
    model = load_model(model_path)
    y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

    f1s.append(f1_score(y_test, y_pred))
    precisions.append(precision_score(y_test, y_pred))
    recalls.append(recall_score(y_test, y_pred))
    auprcs.append(average_precision_score(y_test, y_pred))

# === Plot Accuracy ===
plt.subplot(1, 2, 1)
for i in range(num_folds):
    plt.plot(val_accuracies[i], label=f'Fold {i+1} Val Acc')
    plt.plot(train_accuracies[i], linestyle='--', label=f'Fold {i+1} Train Acc')
plt.title(f'Accuracy (LR={learning_rate})')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# === Plot Loss ===
plt.subplot(1, 2, 2)
for i in range(num_folds):
    plt.plot(val_losses[i], label=f'Fold {i+1} Val Loss')
    plt.plot(train_losses[i], linestyle='--', label=f'Fold {i+1} Train Loss')
plt.title(f'Loss (LR={learning_rate})')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.suptitle(f"Training Curves for LR = {learning_rate}")
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# === Print Test Metrics ===
print(f"🧪 Test Set Metrics (Average Across Folds) for LR = {learning_rate}:")
print(f"🔹 F1 Score:  {np.mean(f1s):.4f}")
print(f"🔹 Precision: {np.mean(precisions):.4f}")
print(f"🔹 Recall:    {np.mean(recalls):.4f}")
print(f"🔹 AUC-PR:    {np.mean(auprcs):.4f}")

## Do full run with MobileNetV2 with best learning rate

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import pandas as pd
import gc
import time

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_Histories_MobileNet2_Stable'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the MobileNetV2 model
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Unfreeze the last 20 layers for fine-tuning
    base_model.trainable = True
    for layer in base_model.layers[:-20]:  
        layer.trainable = False  

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    return model

# Dictionary to track performance
results = {'accuracy': [], 'f1_score': [], 'precision': [], 'recall': [], 'auc_pr': [], 'val_loss': []}

# Cross-validation loop
for val_fold_index in range(5):
    print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
    
    train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
    val_fold = fold_dirs[val_fold_index]
    
    # Load training data
    X_train, y_train = [], []
    for train_fold in train_folds:
        images, labels = load_images_and_labels(train_fold)
        X_train.append(images)
        y_train.append(labels)
    X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
    
    # Load validation data
    X_val, y_val = load_images_and_labels(val_fold)

    # Adjust batch size for Fold 5 to prevent freezing
    batch_size = 64 if val_fold_index != 4 else 32  

    # Train the model
    model = create_mobilenetv2_model(X_train.shape[1:])
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),  # Increased patience
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6),  # Adjusted patience
        ModelCheckpoint(os.path.join(model_save_dir, f'model_mobilenet_fold{val_fold_index + 1}.keras'), save_best_only=True),
        TerminateOnNaN(),  # Stop training if NaN loss is encountered
    ]

    try:
        start_time = time.time()  # Track training time
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=2
        )
        elapsed_time = time.time() - start_time  # Compute elapsed time

        # If training takes too long (e.g., > 2 hours), exit
        if elapsed_time > 7200:
            print(f"Fold {val_fold_index + 1}: Training took too long (>2 hours). Stopping early.")
            break

    except tf.errors.ResourceExhaustedError:
        print("Resource exhausted error caught. Reducing batch size and restarting training.")
        batch_size = batch_size // 2
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=2
        )
    
    # Save history
    history_path = os.path.join(history_save_dir, f'history_mobilenet_fold{val_fold_index + 1}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    # Compute final validation metrics
    y_pred_probs = model.predict(X_val)
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred_labels, zero_division=1)
    precision = precision_score(y_val, y_pred_labels, zero_division=1)
    recall = recall_score(y_val, y_pred_labels, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred_labels)

    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    results['accuracy'].append(accuracy)
    results['f1_score'].append(f1)
    results['precision'].append(precision)
    results['recall'].append(recall)
    results['auc_pr'].append(auc_pr)
    results['val_loss'].append(min(history.history['val_loss']))

    print(f"Fold {val_fold_index + 1} Results: Accuracy={accuracy:.4f}, F1={f1:.4f}, Precision={precision:.4f}, Recall={recall:.4f}, AUC-PR={auc_pr:.4f}")

    # Free up memory
    del model, X_train, y_train, X_val, y_val
    gc.collect()
    tf.keras.backend.clear_session()

# Convert results to DataFrame
df_results = pd.DataFrame(results)

# Save results
df_results.to_csv(os.path.join(history_save_dir, "training_results.csv"), index=False)

# Print summary
print("\n### Final Results Per Fold ###")
print(df_results)

# Compute and print averages
print(f"\nAverage Accuracy: {df_results['accuracy'].mean():.4f}")
print(f"Average F1 Score: {df_results['f1_score'].mean():.4f}")
print(f"Average Precision: {df_results['precision'].mean():.4f}")
print(f"Average Recall: {df_results['recall'].mean():.4f}")
print(f"Average AUC-PR: {df_results['auc_pr'].mean():.4f}")
print(f"Average Validation Loss: {df_results['val_loss'].mean():.4f}")

print("Training complete.")

Analyze

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
num_folds = 5

model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_Histories_MobileNet2_Stable'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load test data ===
def load_test_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        class_path = os.path.join(image_dir, subdir)
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(class_path):
            if fname.endswith('.jpg'):
                path = os.path.join(class_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_test, y_test, test_paths = load_test_images_and_labels(test_image_dir)

# === Plot histories and test metrics ===
plt.figure(figsize=(12, 5))
val_accuracies, val_losses, train_accuracies, train_losses = [], [], [], []
f1s, precisions, recalls, auprcs = [], [], [], []

for fold in range(1, num_folds + 1):
    # === Load history ===
    history_path = os.path.join(history_save_dir, f'history_mobilenet_fold{fold}.pkl')
    with open(history_path, 'rb') as f:
        history = pickle.load(f)

    train_accuracies.append(history['accuracy'])
    val_accuracies.append(history['val_accuracy'])
    train_losses.append(history['loss'])
    val_losses.append(history['val_loss'])

    # === Load model and predict on test set ===
    model_path = os.path.join(model_save_dir, f'model_mobilenet_fold{fold}.keras')
    model = load_model(model_path)
    y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

    f1s.append(f1_score(y_test, y_pred))
    precisions.append(precision_score(y_test, y_pred))
    recalls.append(recall_score(y_test, y_pred))
    auprcs.append(average_precision_score(y_test, y_pred))

# === Plot Accuracy ===
plt.subplot(1, 2, 1)
for i in range(num_folds):
    plt.plot(val_accuracies[i], label=f'Fold {i+1} Val Acc')
    plt.plot(train_accuracies[i], linestyle='--', label=f'Fold {i+1} Train Acc')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# === Plot Loss ===
plt.subplot(1, 2, 2)
for i in range(num_folds):
    plt.plot(val_losses[i], label=f'Fold {i+1} Val Loss')
    plt.plot(train_losses[i], linestyle='--', label=f'Fold {i+1} Train Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.suptitle("Training Curves - MobileNetV2 (All Folds)")
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# === Print Test Metrics ===
print(f"\n🧪 Test Set Metrics (Average Across Folds):")
print(f"🔹 F1 Score:  {np.mean(f1s):.4f}")
print(f"🔹 Precision: {np.mean(precisions):.4f}")
print(f"🔹 Recall:    {np.mean(recalls):.4f}")
print(f"🔹 AUC-PR:    {np.mean(auprcs):.4f}")

## Experiment with MobileNetV2 learning rate

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_curve, auc
import pandas as pd

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments_MobileNet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories_MobileNet'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Learning rates to test
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the MobileNetV2 model
def create_mobilenetv2_model(image_shape, learning_rate):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Unfreeze the last 20 layers for fine-tuning
    base_model.trainable = True
    for layer in base_model.layers[:-20]:  
        layer.trainable = False  

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x) 
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)  
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate, clipvalue=1.0), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Learning rate warm-up function
def lr_warmup(epoch, lr):
    if epoch < 5:
        return 5e-4  # Start with a higher LR for the first few epochs
    return lr

# Dictionary to track performance
results = {lr: {'accuracy': [], 'f1_score': [], 'precision': [], 'recall': [], 'auc_pr': [], 'val_loss': []} for lr in learning_rates}

# Cross-validation loop
for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")
    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
        
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold)

        # Adjust batch size for Fold 5 to prevent freezing
        batch_size = 64 if val_fold_index != 4 else 32  # Reduce batch size only for fold 5

        # Train the model
        model = create_mobilenetv2_model(X_train.shape[1:], lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_mobilenet_lr{lr}_fold{val_fold_index + 1}.keras'), save_best_only=True),
            LearningRateScheduler(lr_warmup)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,  
            batch_size=batch_size,  
            callbacks=callbacks,
            verbose=2
        )
        
        # Save history
        history_path = os.path.join(history_save_dir, f'history_mobilenet_lr{lr}_fold{val_fold_index + 1}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_probs = model.predict(X_val)
        val_preds = (val_probs > 0.5).astype(int)  

        # Compute evaluation metrics
        val_f1 = f1_score(y_val, val_preds)
        val_precision = precision_score(y_val, val_preds)
        val_recall = recall_score(y_val, val_preds)
        precision_vals, recall_vals, _ = precision_recall_curve(y_val, val_probs)
        val_auc_pr = auc(recall_vals, precision_vals)
        min_val_loss = min(history.history['val_loss']) if 'val_loss' in history.history else None

        # Store results
        results[lr]['accuracy'].append(np.mean(val_preds.flatten() == y_val) * 100)
        results[lr]['f1_score'].append(val_f1)
        results[lr]['precision'].append(val_precision)
        results[lr]['recall'].append(val_recall)
        results[lr]['auc_pr'].append(val_auc_pr)
        results[lr]['val_loss'].append(min_val_loss)

        print(f"Fold {val_fold_index + 1} - Accuracy: {results[lr]['accuracy'][-1]:.2f}%, F1 Score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, AUC-PR: {val_auc_pr:.4f}, Val Loss: {min_val_loss}")

        del model
        tf.keras.backend.clear_session()

df_avg_results = pd.DataFrame.from_dict({lr: {metric: np.mean(results[lr][metric]) for metric in results[lr]} for lr in learning_rates}, orient='index')
df_avg_results.to_csv(os.path.join(history_save_dir, "mobilenet_learning_rate_results.csv"))
print(f"Results saved to {os.path.join(history_save_dir, 'mobilenet_learning_rate_results.csv')}")

Analyze MobilNetV2

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.cm import get_cmap

# === Settings ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories_MobileNet'
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5

# === Containers ===
avg_train_acc = {}
avg_val_acc = {}
avg_train_loss = {}
avg_val_loss = {}

# === Pad histories to max length ===
def pad_histories(histories):
    max_len = max(len(h) for h in histories)
    padded = np.array([h + [np.nan] * (max_len - len(h)) for h in histories])
    return padded

# === Load and process histories ===
for lr in learning_rates:
    lr_str = f"{lr:.5f}".rstrip('0').rstrip('.') if lr >= 1e-4 else f"{lr:.0e}".replace('E', 'e')

    train_acc_all, val_acc_all = [], []
    train_loss_all, val_loss_all = [], []

    for fold in range(1, num_folds + 1):
        filename = f'history_mobilenet_lr{lr_str}_fold{fold}.pkl'
        filepath = os.path.join(history_save_dir, filename)

        if os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                history = pickle.load(f)

            if all(k in history for k in ['accuracy', 'val_accuracy', 'loss', 'val_loss']):
                train_acc_all.append(history['accuracy'])
                val_acc_all.append(history['val_accuracy'])
                train_loss_all.append(history['loss'])
                val_loss_all.append(history['val_loss'])
        else:
            print(f"❌ Missing: {filename}")

    if train_acc_all:
        avg_train_acc[lr_str] = np.nanmean(pad_histories(train_acc_all), axis=0)
        avg_val_acc[lr_str] = np.nanmean(pad_histories(val_acc_all), axis=0)
        avg_train_loss[lr_str] = np.nanmean(pad_histories(train_loss_all), axis=0)
        avg_val_loss[lr_str] = np.nanmean(pad_histories(val_loss_all), axis=0)
        print(f"✅ Loaded LR={lr_str}")
    else:
        print(f"⚠️ No history data found for LR={lr_str}")

# === Plot Accuracy and Loss with Unique Colors per LR ===
fig, axs = plt.subplots(1, 2, figsize=(14, 5))
cmap = get_cmap("tab10")  # or 'Set1', 'tab20'

# --- Accuracy ---
for idx, lr_str in enumerate(avg_train_acc):
    color = cmap(idx % 10)
    axs[0].plot(avg_train_acc[lr_str], linestyle='--', color=color, label=f'Train LR={lr_str}')
    axs[0].plot(avg_val_acc[lr_str], linestyle='-', color=color, label=f'Val LR={lr_str}')
axs[0].set_title('Training and Validation Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Accuracy')
axs[0].grid(True)
axs[0].legend()

# --- Loss ---
for idx, lr_str in enumerate(avg_train_loss):
    color = cmap(idx % 10)
    axs[1].plot(avg_train_loss[lr_str], linestyle='--', color=color, label=f'Train LR={lr_str}')
    axs[1].plot(avg_val_loss[lr_str], linestyle='-', color=color, label=f'Val LR={lr_str}')
axs[1].set_title('Training and Validation Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Loss')
axs[1].grid(True)
axs[1].legend()

fig.suptitle('MobileNetV2 - Accuracy & Loss per Epoch by Learning Rate', fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments_MobileNet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories_MobileNet'

# Learning rates tested
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5  # Number of folds used in cross-validation

# Function to load validation images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Dictionaries to store computed metrics
metrics = ['f1_score', 'precision', 'recall', 'auc', 'val_loss']
mobilenetv2_results = {metric: {lr: [] for lr in learning_rates} for metric in metrics}

# Loop through learning rates and folds
for lr in learning_rates:
    for fold in range(1, num_folds + 1):
        model_path = os.path.join(model_save_dir, f'model_mobilenet_lr{lr}_fold{fold}.keras')
        history_path = os.path.join(history_save_dir, f'history_mobilenet_lr{lr}_fold{fold}.pkl')
        val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

        if os.path.exists(model_path):
            print(f"Loading MobileNetV2 model for LR={lr}, Fold={fold}...")
            model = load_model(model_path)  # Load saved model
            
            # Load validation data
            X_val, y_val = load_images_and_labels(val_fold_dir)
            if len(X_val) == 0:
                print(f"Warning: No validation images found for LR={lr}, Fold={fold}")
                continue
            
            # Compute predictions
            val_probs = model.predict(X_val)
            val_preds = (val_probs > 0.5).astype(int)  # Convert probabilities to binary
            
            # Compute metrics
            val_f1 = f1_score(y_val, val_preds)
            val_precision = precision_score(y_val, val_preds)
            val_recall = recall_score(y_val, val_preds)
            val_auc = roc_auc_score(y_val, val_probs)  # Use probabilities for AUC
            
            mobilenetv2_results['f1_score'][lr].append(val_f1)
            mobilenetv2_results['precision'][lr].append(val_precision)
            mobilenetv2_results['recall'][lr].append(val_recall)
            mobilenetv2_results['auc'][lr].append(val_auc)
            
            print(f"Fold {fold} - F1 Score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, AUC: {val_auc:.4f}")

        else:
            print(f"Warning: Model not found for LR={lr}, Fold={fold}")

        # Load Validation Loss from History File
        if os.path.exists(history_path):
            with open(history_path, 'rb') as f:
                history = pickle.load(f)

            # Get best (minimum) validation loss
            if 'val_loss' in history:
                min_val_loss = min(history['val_loss'])
                mobilenetv2_results['val_loss'][lr].append(min_val_loss)
            else:
                print(f"Warning: val_loss missing for LR={lr}, Fold={fold}")
                mobilenetv2_results['val_loss'][lr].append(None)
        else:
            print(f"Warning: History file not found for LR={lr}, Fold={fold}")
            mobilenetv2_results['val_loss'][lr].append(None)

# Compute the average metrics per learning rate
avg_mobilenetv2_metrics = {
    metric: {lr: np.mean(mobilenetv2_results[metric][lr]) if len(mobilenetv2_results[metric][lr]) > 0 else None for lr in learning_rates}
    for metric in metrics
}

# Plot all metrics in one graph
fig, ax1 = plt.subplots(figsize=(10, 5))

# Plot F1 Score, Precision, Recall, and AUC
ax1.set_xlabel('Learning Rate')
ax1.set_xscale('log')
ax1.set_ylabel('Performance Metrics')

ax1.plot(learning_rates, [avg_mobilenetv2_metrics['f1_score'][lr] for lr in learning_rates], marker='o', linestyle='-', color='g', label='F1 Score')
ax1.plot(learning_rates, [avg_mobilenetv2_metrics['precision'][lr] for lr in learning_rates], marker='s', linestyle='-', color='r', label='Precision')
ax1.plot(learning_rates, [avg_mobilenetv2_metrics['recall'][lr] for lr in learning_rates], marker='D', linestyle='-', color='orange', label='Recall')
ax1.plot(learning_rates, [avg_mobilenetv2_metrics['auc'][lr] for lr in learning_rates], marker='^', linestyle='-', color='purple', label='AUC')

ax1.tick_params(axis='y')

# Twin axis for Validation Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='b')
ax2.plot(learning_rates, [avg_mobilenetv2_metrics['val_loss'][lr] for lr in learning_rates], marker='x', linestyle='--', color='b', label='Validation Loss')
ax2.tick_params(axis='y', labelcolor='b')

# Title and Legend
fig.suptitle('MobileNetV2 Learning Rate Performance Metrics')
fig.tight_layout()
fig.legend(loc="upper right")
plt.grid(True)
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments_MobileNet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories_MobileNet'

# Learning rates tested
learning_rates = [1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]
num_folds = 5  # Number of folds used in cross-validation

# Function to load validation images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Dictionaries to store computed metrics
metrics = ['f1_score', 'precision', 'recall', 'pr_auc', 'val_loss']
mobilenetv2_results = {metric: {lr: [] for lr in learning_rates} for metric in metrics}

# Loop through learning rates and folds
for lr in learning_rates:
    for fold in range(1, num_folds + 1):
        model_path = os.path.join(model_save_dir, f'model_mobilenet_lr{lr}_fold{fold}.keras')
        history_path = os.path.join(history_save_dir, f'history_mobilenet_lr{lr}_fold{fold}.pkl')
        val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

        if os.path.exists(model_path):
            print(f"Loading MobileNetV2 model for LR={lr}, Fold={fold}...")
            model = load_model(model_path)  # Load saved model
            
            # Load validation data
            X_val, y_val = load_images_and_labels(val_fold_dir)
            if len(X_val) == 0:
                print(f"Warning: No validation images found for LR={lr}, Fold={fold}")
                continue
            
            # Compute predictions
            val_probs = model.predict(X_val)
            val_preds = (val_probs > 0.5).astype(int)  # Convert probabilities to binary
            
            # Compute metrics
            val_f1 = f1_score(y_val, val_preds)
            val_precision = precision_score(y_val, val_preds)
            val_recall = recall_score(y_val, val_preds)
            val_pr_auc = average_precision_score(y_val, val_probs)  # Precision-Recall AUC
            
            mobilenetv2_results['f1_score'][lr].append(val_f1)
            mobilenetv2_results['precision'][lr].append(val_precision)
            mobilenetv2_results['recall'][lr].append(val_recall)
            mobilenetv2_results['pr_auc'][lr].append(val_pr_auc)
            
            print(f"Fold {fold} - F1 Score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, PR AUC: {val_pr_auc:.4f}")

        else:
            print(f"Warning: Model not found for LR={lr}, Fold={fold}")

        # Load Validation Loss from History File
        if os.path.exists(history_path):
            with open(history_path, 'rb') as f:
                history = pickle.load(f)

            # Get best (minimum) validation loss
            if 'val_loss' in history:
                min_val_loss = min(history['val_loss'])
                mobilenetv2_results['val_loss'][lr].append(min_val_loss)
            else:
                print(f"Warning: val_loss missing for LR={lr}, Fold={fold}")
                mobilenetv2_results['val_loss'][lr].append(None)
        else:
            print(f"Warning: History file not found for LR={lr}, Fold={fold}")
            mobilenetv2_results['val_loss'][lr].append(None)

# Compute the average metrics per learning rate
avg_mobilenetv2_metrics = {
    metric: {lr: np.mean(mobilenetv2_results[metric][lr]) if len(mobilenetv2_results[metric][lr]) > 0 else None for lr in learning_rates}
    for metric in metrics
}

# Plot all metrics in one graph
fig, ax1 = plt.subplots(figsize=(10, 5))

# Plot F1 Score, Precision, Recall, and PR AUC
ax1.set_xlabel('Learning Rate')
ax1.set_xscale('log')
ax1.set_ylabel('Performance Metrics')

ax1.plot(learning_rates, [avg_mobilenetv2_metrics['f1_score'][lr] for lr in learning_rates], marker='o', linestyle='-', color='g', label='F1 Score')
ax1.plot(learning_rates, [avg_mobilenetv2_metrics['precision'][lr] for lr in learning_rates], marker='s', linestyle='-', color='r', label='Precision')
ax1.plot(learning_rates, [avg_mobilenetv2_metrics['recall'][lr] for lr in learning_rates], marker='D', linestyle='-', color='orange', label='Recall')
ax1.plot(learning_rates, [avg_mobilenetv2_metrics['pr_auc'][lr] for lr in learning_rates], marker='^', linestyle='-', color='purple', label='PR AUC')

ax1.tick_params(axis='y')

# Twin axis for Validation Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='b')
ax2.plot(learning_rates, [avg_mobilenetv2_metrics['val_loss'][lr] for lr in learning_rates], marker='x', linestyle='--', color='b', label='Validation Loss')
ax2.tick_params(axis='y', labelcolor='b')

# Title and Legend
fig.suptitle('MobileNetV2 Learning Rate Performance Metrics')
fig.tight_layout()
fig.legend(loc="upper right")
plt.grid(True)
plt.show()

Analyze metrics of full run one learning rate

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_Histories_MobileNet'

num_folds = 5  # Number of folds used in cross-validation

# Function to load validation images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Dictionary to store computed metrics
metrics = ['f1_score', 'precision', 'recall', 'accuracy', 'val_loss']
mobilenet_results = {metric: [] for metric in metrics}

# Loop through folds
for fold in range(1, num_folds + 1):
    model_path = os.path.join(model_save_dir, f'model_mobilenet_fold{fold}.keras')
    history_path = os.path.join(history_save_dir, f'history_mobilenet_fold{fold}.pkl')
    val_fold_dir = os.path.join(base_fold_dir, f'Fold{fold}')

    if os.path.exists(model_path):
        print(f"Loading model for Fold {fold}...")
        model = load_model(model_path)  # Load saved model
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold_dir)
        if len(X_val) == 0:
            print(f"Warning: No validation images found for Fold {fold}")
            continue
        
        # Compute predictions
        val_probs = model.predict(X_val)
        val_preds = (val_probs > 0.5).astype(int)  # Convert probabilities to binary
        
        # Compute metrics
        val_f1 = f1_score(y_val, val_preds)
        val_precision = precision_score(y_val, val_preds)
        val_recall = recall_score(y_val, val_preds)
        val_accuracy = (val_preds == y_val).mean()  # Accuracy

        mobilenet_results['f1_score'].append(val_f1)
        mobilenet_results['precision'].append(val_precision)
        mobilenet_results['recall'].append(val_recall)
        mobilenet_results['accuracy'].append(val_accuracy)
        
        print(f"Fold {fold} - F1 Score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, Accuracy: {val_accuracy:.4f}")

    else:
        print(f"Warning: Model not found for Fold {fold}")

    # Load Validation Loss from History File
    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            history = pickle.load(f)

        # Get best (minimum) validation loss
        if 'val_loss' in history:
            min_val_loss = min(history['val_loss'])
            mobilenet_results['val_loss'].append(min_val_loss)
        else:
            print(f"Warning: val_loss missing for Fold {fold}")
            mobilenet_results['val_loss'].append(None)
    else:
        print(f"Warning: History file not found for Fold {fold}")
        mobilenet_results['val_loss'].append(None)

# Compute the average metrics across folds
avg_mobilenet_metrics = {
    metric: np.mean(mobilenet_results[metric]) if len(mobilenet_results[metric]) > 0 else None
    for metric in metrics
}

# Print results
import pandas as pd
df_results = pd.DataFrame.from_dict(mobilenet_results)
df_results.loc['Average'] = df_results.mean()

print("\nFinal Results Across All Folds:")
print(df_results)

## Take 2 best learning rates and more patience DO

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_curve, auc
import pandas as pd

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Experiments_MobileNetBest2'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories_MobileNetBest2'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Learning rates to test
learning_rates = [1e-3, 1e-2]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the MobileNetV2 model
def create_mobilenetv2_model(image_shape, learning_rate):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Unfreeze the last 20 layers for fine-tuning
    base_model.trainable = True
    for layer in base_model.layers[:-20]:  
        layer.trainable = False  

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)  # Reduced from 256 to 128
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)  # Lowered dropout slightly
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate, clipvalue=1.0), loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

# Learning rate warm-up function
def lr_warmup(epoch, lr):
    if epoch < 5:
        return 5e-4  # Start with a higher LR for the first few epochs
    return lr

# Dictionary to track performance
results = {lr: {'accuracy': [], 'f1_score': [], 'precision': [], 'recall': [], 'auc_pr': [], 'val_loss': []} for lr in learning_rates}

# Cross-validation loop
for lr in learning_rates:
    print(f"\nTesting Learning Rate: {lr}")
    for val_fold_index in range(5):
        print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
        
        train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
        val_fold = fold_dirs[val_fold_index]
        
        # Load training data
        X_train, y_train = [], []
        for train_fold in train_folds:
            images, labels = load_images_and_labels(train_fold)
            X_train.append(images)
            y_train.append(labels)
        X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
        
        # Load validation data
        X_val, y_val = load_images_and_labels(val_fold)

        # Adjust batch size for Fold 5 to prevent freezing
        batch_size = 64 if val_fold_index != 4 else 32  # Reduce batch size only for fold 5

        # Train the model
        model = create_mobilenetv2_model(X_train.shape[1:], lr)
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-7),
            ModelCheckpoint(os.path.join(model_save_dir, f'model_mobilenet_lr{lr}_fold{val_fold_index + 1}.keras'), save_best_only=True),
            LearningRateScheduler(lr_warmup)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,  # Lowered from 30 to prevent stalling
            batch_size=batch_size,  
            callbacks=callbacks,
            verbose=2
        )
        
        # Save history
        history_path = os.path.join(history_save_dir, f'history_mobilenet_lr{lr}_fold{val_fold_index + 1}.pkl')
        with open(history_path, 'wb') as f:
            pickle.dump(history.history, f)
        
        # Evaluate on validation set
        val_probs = model.predict(X_val)
        val_preds = (val_probs > 0.5).astype(int)  

        # Compute evaluation metrics
        val_f1 = f1_score(y_val, val_preds)
        val_precision = precision_score(y_val, val_preds)
        val_recall = recall_score(y_val, val_preds)
        precision_vals, recall_vals, _ = precision_recall_curve(y_val, val_probs)
        val_auc_pr = auc(recall_vals, precision_vals)
        min_val_loss = min(history.history['val_loss']) if 'val_loss' in history.history else None

        # Store results
        results[lr]['accuracy'].append(np.mean(val_preds.flatten() == y_val) * 100)
        results[lr]['f1_score'].append(val_f1)
        results[lr]['precision'].append(val_precision)
        results[lr]['recall'].append(val_recall)
        results[lr]['auc_pr'].append(val_auc_pr)
        results[lr]['val_loss'].append(min_val_loss)

        print(f"Fold {val_fold_index + 1} - Accuracy: {results[lr]['accuracy'][-1]:.2f}%, F1 Score: {val_f1:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, AUC-PR: {val_auc_pr:.4f}, Val Loss: {min_val_loss}")

        del model
        tf.keras.backend.clear_session()

df_avg_results = pd.DataFrame.from_dict({lr: {metric: np.mean(results[lr][metric]) for metric in results[lr]} for lr in learning_rates}, orient='index')
df_avg_results.to_csv(os.path.join(history_save_dir, "mobilenet_learning_rate_results.csv"))
print(f"Results saved to {os.path.join(history_save_dir, 'mobilenet_learning_rate_results.csv')}")

Analyze

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt

# === CONFIG ===
lr_to_plot = 1e-3
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_Histories_MobileNetBest2'
num_folds = 5

# === INIT PLOT STORAGE ===
val_accuracies, train_accuracies = [], []
val_losses, train_losses = [], []

# === PLOT TRAINING CURVES ===
plt.figure(figsize=(12, 5))

for fold in range(1, num_folds + 1):
    history_file = os.path.join(history_save_dir, f'history_mobilenet_lr{lr_to_plot}_fold{fold}.pkl')
    if not os.path.exists(history_file):
        print(f"⚠️ Missing history file for Fold {fold} at LR {lr_to_plot}")
        continue

    with open(history_file, 'rb') as f:
        history = pickle.load(f)

    train_acc = history['accuracy']
    val_acc = history['val_accuracy']
    train_loss = history['loss']
    val_loss = history['val_loss']

    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # === Accuracy Plot ===
    plt.subplot(1, 2, 1)
    plt.plot(train_acc, linestyle='--', label=f'Fold {fold} Train Acc')
    plt.plot(val_acc, label=f'Fold {fold} Val Acc')

    # === Loss Plot ===
    plt.subplot(1, 2, 2)
    plt.plot(train_loss, linestyle='--', label=f'Fold {fold} Train Loss')
    plt.plot(val_loss, label=f'Fold {fold} Val Loss')

# === Finalize Accuracy Plot ===
plt.subplot(1, 2, 1)
plt.title(f'Training vs Validation Accuracy (LR={lr_to_plot})')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# === Finalize Loss Plot ===
plt.subplot(1, 2, 2)
plt.title(f'Training vs Validation Loss (LR={lr_to_plot})')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.suptitle(f"Training Curves for MobileNetV2 (LR = {lr_to_plot})", fontsize=14, y=1.03)
plt.show()

## Make patience with lr less

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import pandas as pd
import gc
import time

# Prevent TensorFlow from freezing
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_Histories_MobileNet2_Stable'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# Folds
fold_dirs = [os.path.join(base_fold_dir, f'Fold{i}') for i in range(1, 6)]

# Function to load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_image_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_image_dir):
            continue
        for fname in os.listdir(full_image_dir):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_image_dir, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Function to create the MobileNetV2 model
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Unfreeze the last 20 layers for fine-tuning
    base_model.trainable = True
    for layer in base_model.layers[:-20]:  
        layer.trainable = False  

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    return model

# Dictionary to track performance
results = {'accuracy': [], 'f1_score': [], 'precision': [], 'recall': [], 'auc_pr': [], 'val_loss': []}

# Cross-validation loop
for val_fold_index in range(5):
    print(f"\nProcessing Fold {val_fold_index + 1} as Validation Set")
    
    train_folds = [fold for i, fold in enumerate(fold_dirs) if i != val_fold_index]
    val_fold = fold_dirs[val_fold_index]
    
    # Load training data
    X_train, y_train = [], []
    for train_fold in train_folds:
        images, labels = load_images_and_labels(train_fold)
        X_train.append(images)
        y_train.append(labels)
    X_train, y_train = np.concatenate(X_train), np.concatenate(y_train)
    
    # Load validation data
    X_val, y_val = load_images_and_labels(val_fold)

    # Adjust batch size for Fold 5 to prevent freezing
    batch_size = 64 if val_fold_index != 4 else 32  

    # Train the model
    model = create_mobilenetv2_model(X_train.shape[1:])
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),  # Increased patience
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-6),  # Adjusted patience
        ModelCheckpoint(os.path.join(model_save_dir, f'model_mobilenet_fold{val_fold_index + 1}.keras'), save_best_only=True),
        TerminateOnNaN(),  # Stop training if NaN loss is encountered
    ]

    try:
        start_time = time.time()  # Track training time
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=2
        )
        elapsed_time = time.time() - start_time  # Compute elapsed time

        # If training takes too long (e.g., > 2 hours), exit
        if elapsed_time > 7200:
            print(f"Fold {val_fold_index + 1}: Training took too long (>2 hours). Stopping early.")
            break

    except tf.errors.ResourceExhaustedError:
        print("Resource exhausted error caught. Reducing batch size and restarting training.")
        batch_size = batch_size // 2
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=2
        )
    
    # Save history
    history_path = os.path.join(history_save_dir, f'history_mobilenet_fold{val_fold_index + 1}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    # Compute final validation metrics
    y_pred_probs = model.predict(X_val)
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred_labels, zero_division=1)
    precision = precision_score(y_val, y_pred_labels, zero_division=1)
    recall = recall_score(y_val, y_pred_labels, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred_labels)

    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    results['accuracy'].append(accuracy)
    results['f1_score'].append(f1)
    results['precision'].append(precision)
    results['recall'].append(recall)
    results['auc_pr'].append(auc_pr)
    results['val_loss'].append(min(history.history['val_loss']))

    print(f"Fold {val_fold_index + 1} Results: Accuracy={accuracy:.4f}, F1={f1:.4f}, Precision={precision:.4f}, Recall={recall:.4f}, AUC-PR={auc_pr:.4f}")

    # Free up memory
    del model, X_train, y_train, X_val, y_val
    gc.collect()
    tf.keras.backend.clear_session()

# Convert results to DataFrame
df_results = pd.DataFrame(results)

# Save results
df_results.to_csv(os.path.join(history_save_dir, "training_results.csv"), index=False)

# Print summary
print("\n### Final Results Per Fold ###")
print(df_results)

# Compute and print averages
print(f"\nAverage Accuracy: {df_results['accuracy'].mean():.4f}")
print(f"Average F1 Score: {df_results['f1_score'].mean():.4f}")
print(f"Average Precision: {df_results['precision'].mean():.4f}")
print(f"Average Recall: {df_results['recall'].mean():.4f}")
print(f"Average AUC-PR: {df_results['auc_pr'].mean():.4f}")
print(f"Average Validation Loss: {df_results['val_loss'].mean():.4f}")

print("Training complete.")

Analyze

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === CONFIG ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_Histories_MobileNet2_Stable'
num_folds = 5

# === INIT PLOT STORAGE ===
val_accuracies, train_accuracies = [], []
val_losses, train_losses = [], []

# === PLOT TRAINING CURVES ===
plt.figure(figsize=(12, 5))

for fold in range(1, num_folds + 1):
    history_file = os.path.join(history_save_dir, f'history_mobilenet_fold{fold}.pkl')
    if not os.path.exists(history_file):
        print(f"⚠️ Missing history file for Fold {fold}")
        continue

    with open(history_file, 'rb') as f:
        history = pickle.load(f)

    train_acc = history.get('accuracy', [])
    val_acc = history.get('val_accuracy', [])
    train_loss = history.get('loss', [])
    val_loss = history.get('val_loss', [])

    if not train_acc or not val_acc:
        print(f"⚠️ Empty history for Fold {fold}")
        continue

    # === Accuracy Plot ===
    plt.subplot(1, 2, 1)
    plt.plot(train_acc, linestyle='--', label=f'Fold {fold} Train Acc')
    plt.plot(val_acc, label=f'Fold {fold} Val Acc')

    # === Loss Plot ===
    plt.subplot(1, 2, 2)
    plt.plot(train_loss, linestyle='--', label=f'Fold {fold} Train Loss')
    plt.plot(val_loss, label=f'Fold {fold} Val Loss')

# === Finalize Accuracy Plot ===
plt.subplot(1, 2, 1)
plt.title('Training vs Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# === Finalize Loss Plot ===
plt.subplot(1, 2, 2)
plt.title('Training vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.suptitle("Training Curves for MobileNetV2 (Final Stable Run)", fontsize=14, y=1.03)
plt.show()

Confusion matrix

In [None]:
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix

# === CONFIG ===
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
class_names = ['Bad', 'Good']  # Label 0 = Bad, 1 = Good

# === Load Test Images and Labels ===
def load_test_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in class_names:
        full_dir = os.path.join(image_dir, subdir)
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_test, y_test, test_paths = load_test_images_and_labels(test_image_dir)

# === Loop Through Folds and Plot Confusion Matrices ===
for fold in range(1, 6):
    model_path = os.path.join(model_save_dir, f'model_mobilenet_fold{fold}.keras')
    
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}")
        continue

    print(f"\n🔍 Generating Confusion Matrix for Fold {fold}")
    model = load_model(model_path)
    y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

    # === Compute Confusion Matrix ===
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix - Fold {fold}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.show()

In [None]:
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix

# === CONFIG ===
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable'
test_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'
class_names = ['Bad', 'Good']  # Label 0 = Bad, 1 = Good

# === Load Test Images and Labels ===
def load_test_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in class_names:
        full_dir = os.path.join(image_dir, subdir)
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_test, y_test, test_paths = load_test_images_and_labels(test_image_dir)

# === Loop Through Folds and Plot Confusion Matrices ===
for fold in range(1, 6):
    model_path = os.path.join(model_save_dir, f'model_mobilenet_fold{fold}.keras')
    
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}")
        continue

    print(f"\n🔍 Generating Confusion Matrix for Fold {fold}")
    model = load_model(model_path)
    y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

    # === Compute Confusion Matrix ===
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix - Fold {fold}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.show()

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Loop Through Folds and Compute Metrics + Plot Confusion Matrix ===
for fold in range(1, 6):
    model_path = os.path.join(model_save_dir, f'model_mobilenet_fold{fold}.keras')
    
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}")
        continue

    print(f"\n🔍 Evaluation for Fold {fold}")
    model = load_model(model_path)
    y_pred_probs = model.predict(X_test).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # === Metrics ===
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    prec_vals, rec_vals, _ = precision_recall_curve(y_test, y_pred_probs)
    auc_pr = auc(rec_vals, prec_vals)

    # === Print Metrics ===
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"AUC-PR       : {auc_pr:.4f}")

    # === Confusion Matrix ===
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix - Fold {fold}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.show()

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array, load_img

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable/model_mobilenet_fold1.keras'
image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test/Good'  # or 'Bad'
layer_names = ['block_16_project', 'block_13_expand']  # Example layers to visualize
image_paths = [os.path.join(image_dir, fname) for fname in os.listdir(image_dir) if fname.endswith('.jpg')][:4]

# === Load model ===
model = load_model(model_path)

# === Grad-CAM Function ===
def get_gradcam_heatmap(img_array, model, layer_name):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# === Overlay Heatmap on Image ===
def overlay_heatmap(original_img, heatmap, alpha=0.4):
    heatmap_resized = cv2.resize(heatmap, (original_img.shape[1], original_img.shape[0]))
    heatmap_rgb = np.uint8(255 * plt.cm.jet(heatmap_resized)[:, :, :3])
    overlayed = cv2.addWeighted(original_img.astype('uint8'), 1 - alpha, heatmap_rgb, alpha, 0)
    return overlayed

# === Visualize for each image and layer ===
for i, img_path in enumerate(image_paths):
    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img) / 255.0
    input_tensor = np.expand_dims(img_array, axis=0)

    fig, axes = plt.subplots(1, len(layer_names) + 1, figsize=(14, 4))
    axes[0].imshow(img_array)
    axes[0].set_title('Original')
    axes[0].axis('off')

    for j, layer in enumerate(layer_names):
        heatmap = get_gradcam_heatmap(input_tensor, model, layer)
        overlayed_img = overlay_heatmap((img_array * 255).astype(np.uint8), heatmap)
        axes[j + 1].imshow(overlayed_img)
        axes[j + 1].set_title(f'Grad-CAM\n{layer}')
        axes[j + 1].axis('off')

    plt.suptitle(f"Grad-CAM Visualization for {os.path.basename(img_path)}", fontsize=12)
    plt.tight_layout()
    plt.show()

Heat map

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable/model_mobilenet_fold1.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Bad/JH1633_IMG_9233_saturation.jpg'
target_size = (224, 224)
last_conv_layer_name = 'Conv_1'  # Last conv layer in MobileNetV2

# === LOAD MODEL AND IMAGE ===
model = load_model(model_path)

img = load_img(image_path, target_size=target_size)
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0) / 255.0

# === BUILD MODEL FOR GRAD-CAM ===
grad_model = tf.keras.models.Model(
    [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
)

with tf.GradientTape() as tape:
    conv_outputs, predictions = grad_model(img_array)
    pred_index = tf.argmax(predictions[0])
    class_output = predictions[:, pred_index]

# === GRADIENTS AND FEATURE MAP ===
grads = tape.gradient(class_output, conv_outputs)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
conv_outputs = conv_outputs[0]

# === GENERATE HEATMAP ===
heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)
heatmap = np.maximum(heatmap, 0)
heatmap /= tf.math.reduce_max(heatmap) + 1e-8  # Normalize

# === LOAD ORIGINAL IMAGE WITH CV2 ===
img_cv = cv2.imread(image_path)
img_cv = cv2.resize(img_cv, target_size)
heatmap = cv2.resize(heatmap.numpy(), (img_cv.shape[1], img_cv.shape[0]))
heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
overlay = cv2.addWeighted(img_cv, 0.6, heatmap_colored, 0.4, 0)

# === SHOW RESULT ===
plt.figure(figsize=(12, 4))

plt.subplot(1, 3, 1)
plt.imshow(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
plt.title("Original Image")
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(heatmap, cmap='jet')
plt.title("Grad-CAM Heatmap")
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
plt.title("Overlay")
plt.axis('off')

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === CONFIG ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/Final_MobileNet2_Stable/model_mobilenet_fold1.keras'
image_paths = [
    '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Bad/JH1633_IMG_9234_flip.jpg',
    '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Bad/E21169_IMG_8680.jpg',
    '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Good/E1686_IMG_8413.jpg',
    '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Good/JH1657_IMG_9769_sharp.jpg'
]
target_size = (224, 224)
last_conv_layer_name = 'Conv_1'

# === LOAD MODEL ===
model = load_model(model_path)

# === PLOT SETUP ===
plt.figure(figsize=(20, 5))

for i, image_path in enumerate(image_paths):
    # === LOAD AND PREPROCESS IMAGE ===
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array_exp = np.expand_dims(img_array, axis=0) / 255.0

    # === BUILD GRAD-CAM MODEL ===
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array_exp)
        pred_index = tf.argmax(predictions[0])
        class_output = predictions[:, pred_index]

    # === COMPUTE GRAD-CAM ===
    grads = tape.gradient(class_output, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= tf.math.reduce_max(heatmap) + 1e-8

    # === COMBINE WITH ORIGINAL IMAGE ===
    img_cv = cv2.imread(image_path)
    img_cv = cv2.resize(img_cv, target_size)
    heatmap_resized = cv2.resize(heatmap.numpy(), (img_cv.shape[1], img_cv.shape[0]))
    heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap_resized), cv2.COLORMAP_JET)
    overlay = cv2.addWeighted(img_cv, 0.6, heatmap_colored, 0.4, 0)

    # === SHOW OVERLAY IMAGE ONLY ===
    plt.subplot(1, 4, i + 1)
    plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
    plt.axis('off')

plt.tight_layout()
plt.show()

# YOLO

In [None]:
import sys
!{sys.executable} -m pip install --upgrade pip
!{sys.executable} -m pip install ultralytics

In [None]:
from ultralytics import YOLO
print("YOLO is ready! 🚀")

In [None]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt

# Load YOLOv8s (small and fast)
model = YOLO('yolov8s.pt')

# Your image path
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold3/Good/11?_IMG_8450_flip.jpg'

# Load and convert image
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Run detection
results = model(image_rgb)

# Plot detection results
results[0].plot()
plt.imshow(image_rgb)
plt.title("YOLO Detection Output")
plt.axis("off")
plt.show()

In [None]:
# Print detected classes
for box in results[0].boxes:
    class_id = int(box.cls.item())
    class_name = results[0].names[class_id]
    conf = box.conf.item()
    print(f"Detected: {class_name} (Confidence: {conf:.2f})")

In [None]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Load model and image
model = YOLO('yolov8s.pt')
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Good/E1686_IMG_8412_saturation.jpg'
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Run detection
results = model(image_rgb)

# Filter to cows only
boxes = results[0].boxes
scores = boxes.conf.cpu().numpy()
class_ids = boxes.cls.cpu().numpy()
xyxy_boxes = boxes.xyxy.cpu().numpy()

# Get class names
names = results[0].names

# Find cow with the largest area instead of highest score
best_index = None
largest_area = 0

for i, class_id in enumerate(class_ids):
    class_name = names[int(class_id)]
    if class_name in ['cow', 'animal', 'bull', 'cattle']:  # adapt as needed
        x1, y1, x2, y2 = map(int, xyxy_boxes[i])
        area = (x2 - x1) * (y2 - y1)

        if area > largest_area:
            largest_area = area
            best_index = i


# Proceed if a cow was found
if best_index is not None:
    x1, y1, x2, y2 = map(int, xyxy_boxes[best_index])

    # Remove bottom 30%
    height = y2 - y1
    y2 = y1 + int(height * 0.7)

    cropped = image_rgb[y1:y2, x1:x2]

    # Show and save
    plt.imshow(cropped)
    plt.title("Top Cow (No Legs)")
    plt.axis("off")
    plt.show()

    save_path = '/Users/suzetteschulenburg/Desktop/CroppedCow_Top1.jpg'
    cv2.imwrite(save_path, cv2.cvtColor(cropped, cv2.COLOR_RGB2BGR))
    print(f"✅ Saved to: {save_path}")
else:
    print("❌ No cow detected.")

In [None]:
from rembg import remove
from PIL import Image
import io

# Convert image to bytes
image_pil = Image.fromarray(image_rgb)
buffered = io.BytesIO()
image_pil.save(buffered, format="PNG")
img_bytes = buffered.getvalue()

# Remove background
output_bytes = remove(img_bytes)
output_image = Image.open(io.BytesIO(output_bytes))

# Show and save
output_image.show()

In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
from rembg import remove
import io
import matplotlib.pyplot as plt

# === Load image and YOLO model ===
model = YOLO('yolov8s.pt')
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold4/Bad/DVV1510_IMG_0610_aug0.jpg'
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# === Run YOLO Detection ===
results = model(image_rgb)
boxes = results[0].boxes
scores = boxes.conf.cpu().numpy()
class_ids = boxes.cls.cpu().numpy()
xyxy_boxes = boxes.xyxy.cpu().numpy()
names = results[0].names

# === Select Largest Cow Only ===
best_index = None
largest_area = 0

for i, class_id in enumerate(class_ids):
    class_name = names[int(class_id)]
    if class_name in ['cow', 'animal', 'bull', 'cattle']:
        x1, y1, x2, y2 = map(int, xyxy_boxes[i])
        area = (x2 - x1) * (y2 - y1)
        if area > largest_area:
            largest_area = area
            best_index = i

if best_index is not None:
    # === Crop the cow with padding and leg removal ===
    x1, y1, x2, y2 = map(int, xyxy_boxes[best_index])

    h, w, _ = image_rgb.shape
    margin = 0.1  # 10% margin
    x1 = max(0, x1 - int((x2 - x1) * margin))
    x2 = min(w, x2 + int((x2 - x1) * margin))
    y2 = y1 + int((y2 - y1) * 0.7)  # remove bottom 30%

    cropped = image_rgb[y1:y2, x1:x2]

    # === Convert to PNG and remove background ===
    image_pil = Image.fromarray(cropped)
    buffered = io.BytesIO()
    image_pil.save(buffered, format="PNG")
    img_bytes = buffered.getvalue()

    output_bytes = remove(img_bytes)
    output_image = Image.open(io.BytesIO(output_bytes))

    # === Show and Save ===
    output_image.show()
    print(f"✅ Saved cropped cow with background removed: {save_path}")
else:
    print("❌ No main cow detected.")

### Cropped cow and removed bg

In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load YOLOv8 segmentation model
model = YOLO('yolov8s-seg.pt')

# Load and preprocess image
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Good/E1686_IMG_8412_saturation.jpg'
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Run YOLO segmentation
results = model(image_rgb)

# Visualize detections
results[0].plot()
plt.imshow(image_rgb)
plt.axis('off')
plt.title("YOLOv8 Segmentation")
plt.show()

In [None]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import numpy as np

# === Load model and image ===
model = YOLO("yolov8s-seg.pt")
image_path = "/Users/suzetteschulenburg/Desktop/MainUse2/Fold4/Bad/DVV1510_IMG_0612_aug0.jpg"
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_h, image_w = image_rgb.shape[:2]

# === Run detection ===
results = model(image_rgb)
masks = results[0].masks
boxes = results[0].boxes
names = results[0].names

if masks is not None and len(masks.data) > 0:
    # Select the largest cow
    best_index = None
    largest_area = 0
    for i, cls in enumerate(boxes.cls.cpu().numpy()):
        class_name = names[int(cls)]
        if class_name in ['cow', 'bull', 'animal', 'cattle']:
            x1, y1, x2, y2 = map(int, boxes.xyxy[i].cpu().numpy())
            area = (x2 - x1) * (y2 - y1)
            if area > largest_area:
                best_index = i
                largest_area = area

    if best_index is not None:
        # === Resize mask to full image size ===
        raw_mask = masks.data[best_index].cpu().numpy()
        resized_mask = cv2.resize(raw_mask, (image_w, image_h), interpolation=cv2.INTER_NEAREST)
        mask_3ch = np.stack([resized_mask] * 3, axis=-1)

        # === Apply mask to original image ===
        masked_image = np.where(mask_3ch > 0.5, image_rgb, 255)

        # === Get and expand crop box ===
        x1, y1, x2, y2 = map(int, boxes.xyxy[best_index].cpu().numpy())
        margin = 0.1
        x1 = max(0, x1 - int((x2 - x1) * margin))
        x2 = min(image_w, x2 + int((x2 - x1) * margin))
        y2 = y1 + int((y2 - y1) * 0.8)  # remove bottom 30%

        final_crop = masked_image[y1:y2, x1:x2]

        # === Show and save ===
        plt.imshow(final_crop.astype(np.uint8))
        plt.axis("off")
        plt.title("Main Cow – Segmented and Cropped")
        plt.show()

        save_path = "/Users/suzetteschulenburg/Desktop/CroppedCow_Seg_NoLegs_Fixed.png"
        cv2.imwrite(save_path, cv2.cvtColor(final_crop.astype(np.uint8), cv2.COLOR_RGB2BGR))
        print(f"✅ Saved to: {save_path}")
    else:
        print("❌ No main cow found.")
else:
    print("❌ No mask found.")

# YOLO and MobileNetV2

### Crop and remove Background Resize and Save

In [None]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
from PIL import Image

# === Function to resize with padding ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # (height, width)
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image
    resized_image = cv2.resize(image, (new_size[1], new_size[0]))

    # Create new image and center the resized image on it
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)

    color = [255, 255, 255]  # White background
    new_im = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return new_im

# === Load YOLO segmentation model ===
model = YOLO("yolov8s-seg.pt")

# === Paths ===
input_base = "/Users/suzetteschulenburg/Desktop/MainUse2/Fold1"
output_base = "/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1"
os.makedirs(output_base, exist_ok=True)

# === Subfolders to process ===
classes = ['Good', 'Bad']

for cls in classes:
    input_folder = os.path.join(input_base, cls)
    output_folder = os.path.join(output_base, cls)
    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(input_folder):
        if not fname.endswith('.jpg'):
            continue

        # === Load image ===
        image_path = os.path.join(input_folder, fname)
        image = cv2.imread(image_path)
        if image is None:
            print(f"⚠️ Could not read: {image_path}")
            continue

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_h, image_w = image_rgb.shape[:2]

        # === Run YOLO segmentation ===
        results = model(image_rgb)
        masks = results[0].masks
        boxes = results[0].boxes
        names = results[0].names

        if masks is None or len(masks.data) == 0:
            print(f"❌ No cow mask found in: {fname}")
            continue

        # === Get largest cow ===
        best_index = None
        largest_area = 0
        for i, cls_id in enumerate(boxes.cls.cpu().numpy()):
            name = names[int(cls_id)]
            if name in ['cow', 'bull', 'animal', 'cattle']:
                x1, y1, x2, y2 = map(int, boxes.xyxy[i].cpu().numpy())
                area = (x2 - x1) * (y2 - y1)
                if area > largest_area:
                    best_index = i
                    largest_area = area

        if best_index is None:
            print(f"❌ No valid cow class in: {fname}")
            continue

        # === Resize mask to image ===
        mask = masks.data[best_index].cpu().numpy()
        resized_mask = cv2.resize(mask, (image_w, image_h), interpolation=cv2.INTER_NEAREST)
        mask_3ch = np.stack([resized_mask] * 3, axis=-1)

        # === Apply mask to full image ===
        masked_image = np.where(mask_3ch > 0.5, image_rgb, 255)

        # === Crop, remove bottom 30%, add margin ===
        x1, y1, x2, y2 = map(int, boxes.xyxy[best_index].cpu().numpy())
        margin = 0.1
        x1 = max(0, x1 - int((x2 - x1) * margin))
        x2 = min(image_w, x2 + int((x2 - x1) * margin))
        y2 = y1 + int((y2 - y1) * 0.7)  # remove bottom 30%

        cropped = masked_image[y1:y2, x1:x2]

        # === Resize with padding to 224x224 ===
        resized = resize_with_padding(cropped, desired_size=224)

        # === Save final image ===
        output_path = os.path.join(output_folder, fname.replace(".jpg", "_processed.jpg"))
        cv2.imwrite(output_path, cv2.cvtColor(resized, cv2.COLOR_RGB2BGR))

        print(f"✅ Saved: {output_path}")

#### Crop Test set

In [None]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
from PIL import Image

# === Function to resize with padding ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # (height, width)
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image
    resized_image = cv2.resize(image, (new_size[1], new_size[0]))

    # Create new image and center the resized image on it
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)

    color = [255, 255, 255]  # White background
    new_im = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
    return new_im

# === Load YOLO segmentation model ===
model = YOLO("yolov8s-seg.pt")

# === Paths ===
input_base = "/Users/suzetteschulenburg/Desktop/MainUse/Test"
output_base = "/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test"
os.makedirs(output_base, exist_ok=True)

# === Subfolders to process ===
classes = ['Good', 'Bad']

for cls in classes:
    input_folder = os.path.join(input_base, cls)
    output_folder = os.path.join(output_base, cls)
    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(input_folder):
        if not fname.endswith('.jpg'):
            continue

        # === Load image ===
        image_path = os.path.join(input_folder, fname)
        image = cv2.imread(image_path)
        if image is None:
            print(f"⚠️ Could not read: {image_path}")
            continue

        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_h, image_w = image_rgb.shape[:2]

        # === Run YOLO segmentation ===
        results = model(image_rgb)
        masks = results[0].masks
        boxes = results[0].boxes
        names = results[0].names

        if masks is None or len(masks.data) == 0:
            print(f"❌ No cow mask found in: {fname}")
            continue

        # === Get largest cow ===
        best_index = None
        largest_area = 0
        for i, cls_id in enumerate(boxes.cls.cpu().numpy()):
            name = names[int(cls_id)]
            if name in ['cow', 'bull', 'animal', 'cattle']:
                x1, y1, x2, y2 = map(int, boxes.xyxy[i].cpu().numpy())
                area = (x2 - x1) * (y2 - y1)
                if area > largest_area:
                    best_index = i
                    largest_area = area

        if best_index is None:
            print(f"❌ No valid cow class in: {fname}")
            continue

        # === Resize mask to image ===
        mask = masks.data[best_index].cpu().numpy()
        resized_mask = cv2.resize(mask, (image_w, image_h), interpolation=cv2.INTER_NEAREST)
        mask_3ch = np.stack([resized_mask] * 3, axis=-1)

        # === Apply mask to full image ===
        masked_image = np.where(mask_3ch > 0.5, image_rgb, 255)

        # === Crop, remove bottom 30%, add margin ===
        x1, y1, x2, y2 = map(int, boxes.xyxy[best_index].cpu().numpy())
        margin = 0.1
        x1 = max(0, x1 - int((x2 - x1) * margin))
        x2 = min(image_w, x2 + int((x2 - x1) * margin))
        y2 = y1 + int((y2 - y1) * 0.7)  # remove bottom 30%

        cropped = masked_image[y1:y2, x1:x2]

        # === Resize with padding to 224x224 ===
        resized = resize_with_padding(cropped, desired_size=224)

        # === Save final image ===
        output_path = os.path.join(output_folder, fname.replace(".jpg", "_processed.jpg"))
        cv2.imwrite(output_path, cv2.cvtColor(resized, cv2.COLOR_RGB2BGR))

        print(f"✅ Saved: {output_path}")

## Compare MobileNetV2 with images not using YOLO

### Train

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUse2'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_NOYOLO'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_NOYOLO'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2, 3, 4, 5) ===
X_train, y_train = [], []
for fold_num in [2, 3, 4, 5]:  # Fold1 will be validation
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=4e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold2345_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold2345_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2, 3, 4, 5 with Fold 1 as validation (All layers frozen).")

Heatmap

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import cv2

# === Paths ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_NOYOLO/model_fold2345_val1_frozen.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUse2/Fold1/Good/E1686_IMG_8412_flip.jpg'  # replace with actual image path

# === Load model and image ===
model = load_model(model_path)
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img) / 255.0
input_array = np.expand_dims(img_array, axis=0)

# === Grad-CAM function ===
def get_gradcam_heatmap(model, img_array, last_conv_layer_name='Conv_1'):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    conv_outputs = conv_outputs[0]
    heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_outputs), axis=-1)

    heatmap = tf.maximum(heatmap, 0) / tf.reduce_max(heatmap)
    return heatmap.numpy()

# === Generate and display Grad-CAM ===
heatmap = get_gradcam_heatmap(model, input_array, last_conv_layer_name='Conv_1')  # last conv layer in MobileNetV2

# Superimpose on original image
def superimpose_heatmap_transparent(img, heatmap, threshold=0.3):
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    mask = np.uint8(255 * heatmap)
    heatmap_color = cv2.applyColorMap(mask, cv2.COLORMAP_JET)

    # Create binary mask where attention is strong
    binary_mask = heatmap > threshold
    binary_mask = binary_mask.astype(np.uint8)

    overlay = img.copy()
    overlay[binary_mask == 1] = heatmap_color[binary_mask == 1] / 255.0
    return np.uint8(overlay * 255)



overlay_img = superimpose_heatmap(img_array, heatmap)

# === Plot ===
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(img_array)
plt.title("Original Image")
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(overlay_img)
plt.title("Grad-CAM Overlay")
plt.axis('off')

plt.tight_layout()
plt.show()

With yolo

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import cv2

# === Paths ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
image_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1/Good/E1686_IMG_8412_flip_processed.jpg'  # Change if needed

# === Load model and image ===
model = load_model(model_path)
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img) / 255.0
input_array = np.expand_dims(img_array, axis=0)

# === Layer names to extract heatmaps from (deep to shallow) ===
layer_names = [
    'block_1_expand',     # shallow
    'block_3_expand',     # mid-shallow
    'block_6_expand',     # mid
    'block_13_expand',    # mid-deep
    'Conv_1'              # last conv layer
]

# === Grad-CAM function for multiple layers ===
def generate_multiple_gradcam_heatmaps(model, img_array, layer_names):
    heatmaps = {}
    for layer_name in layer_names:
        grad_model = tf.keras.models.Model(
            [model.inputs], [model.get_layer(layer_name).output, model.output]
        )

        with tf.GradientTape() as tape:
            conv_outputs, predictions = grad_model(img_array)
            loss = predictions[:, 0]

        grads = tape.gradient(loss, conv_outputs)
        pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

        conv_outputs = conv_outputs[0]
        heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_outputs), axis=-1)

        heatmap = tf.maximum(heatmap, 0) / tf.reduce_max(heatmap + tf.keras.backend.epsilon())
        heatmaps[layer_name] = heatmap.numpy()
    return heatmaps

# === Generate heatmaps ===
heatmaps = generate_multiple_gradcam_heatmaps(model, input_array, layer_names)

# === Plot heatmaps on original image ===
plt.figure(figsize=(18, 10))
for i, (layer_name, heatmap) in enumerate(heatmaps.items()):
    resized = cv2.resize(heatmap, (img_array.shape[1], img_array.shape[0]))
    heatmap_color = cv2.applyColorMap(np.uint8(255 * resized), cv2.COLORMAP_JET)
    img_uint8 = np.uint8(img_array * 255)
    overlay = cv2.addWeighted(heatmap_color, 0.5, img_uint8, 0.5, 0)

    plt.subplot(2, 3, i + 1)
    plt.imshow(overlay)
    plt.title(f'Grad-CAM: {layer_name}')
    plt.axis('off')

# Show original image for reference
plt.subplot(2, 3, len(heatmaps) + 1)
plt.imshow(img_array)
plt.title("Original Image")
plt.axis('off')

plt.tight_layout()
plt.show()

# MobileNetV2

### Experiment LR

Train fold 1 1e-5

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Freeze all layers
    base_model.trainable = False

    # Output structure
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-5, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2–5 with Fold 1 as validation (All layers frozen).")

Train 1e-6

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus6'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus6'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Freeze all layers
    base_model.trainable = False

    # Output structure
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-6, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2–5 with Fold 1 as validation (All layers frozen).")

Train 5e-6

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus6'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus6'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Freeze all layers
    base_model.trainable = False

    # Output structure
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=5e-6, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2–5 with Fold 1 as validation (All layers frozen).")

Train 5e-5

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus5'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus5'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Freeze all layers
    base_model.trainable = False

    # Output structure
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=5e-5, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2–5 with Fold 1 as validation (All layers frozen).")

Train 1e-4

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Freeze all layers
    base_model.trainable = False

    # Output structure
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2–5 with Fold 1 as validation (All layers frozen).")

5e-4

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    # Freeze all layers
    base_model.trainable = False

    # Output structure
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=5e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2–5 with Fold 1 as validation (All layers frozen).")

Analyze val and train graphs per Lr

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Mapping of learning rates to their respective folders ===
history_folders = {
    '1e-06': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus6',
    '5e-06': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus6',
    '1e-05': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers',
    '5e-05': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus5',
    '1e-04': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4',
    '5e-04': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus4',
}

colors = ['orange', 'purple', 'blue', 'green', 'red', 'black']

# === Set up plot ===
fig, axes = plt.subplots(1, 2, figsize=(20, 5))

print("📊 Plotting training and validation curves (up to epoch 400)...\n")

# === Loop through each learning rate/folder and plot ===
for (lr, folder), color in zip(history_folders.items(), colors):
    history_path = os.path.join(folder, 'history_fold245_val1_frozen.pkl')
    
    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)

        # Limit to first 400 epochs
        acc = hist['accuracy'][:200]
        val_acc = hist['val_accuracy'][:200]
        loss = hist['loss'][:200]
        val_loss = hist['val_loss'][:200]

        # Accuracy
        axes[0].plot(acc, linestyle='-', color=color, alpha=0.6, label=f'Train Acc (LR={lr})')
        axes[0].plot(val_acc, linestyle='--', color=color, label=f'Val Acc (LR={lr})')

        # Loss
        axes[1].plot(loss, linestyle='-', color=color, alpha=0.6, label=f'Train Loss (LR={lr})')
        axes[1].plot(val_loss, linestyle='--', color=color, label=f'Val Loss (LR={lr})')
    else:
        print(f"⚠️ Missing file for LR {lr}: {history_path}")

# === Final plot formatting ===
axes[0].set_title('Training and Validation Accuracy')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].grid(True)
axes[0].legend()

axes[1].set_title('Training and Validation Loss')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].grid(True)
axes[1].legend()

plt.tight_layout()
plt.show()

Graph over metrics

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Learning rates and history folders ===
history_folders = {
    '1e-06': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus6',
    '5e-06': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus6',
    '1e-05': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers',
    '5e-05': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus5',
    '1e-04': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4',
    '5e-04': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus4',
}

learning_rates = []
best_val_accuracies = []
min_val_losses = [] 

# === Extract best metrics from history files ===
for lr, folder in history_folders.items():
    history_path = os.path.join(folder, 'history_fold245_val1_frozen.pkl')
    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)
        
        best_acc = max(hist['val_accuracy'])
        min_loss = min(hist['val_loss'])

        learning_rates.append(float(lr))
        best_val_accuracies.append(best_acc)
        min_val_losses.append(min_loss)
        print(f"✅ LR={lr}: Best Val Acc={best_acc:.4f}, Min Val Loss={min_loss:.4f}")
    else:
        print(f"⚠️ Missing file for LR {lr}: {history_path}")

# === Plot: Val Accuracy vs Learning Rate (U-shape expected) ===
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(learning_rates, best_val_accuracies, marker='o')
plt.xscale('log')
plt.xlabel('Learning Rate (log scale)')
plt.ylabel('Best Validation Accuracy')
plt.title('Validation Accuracy vs Learning Rate')
plt.grid(True)

# === Plot: Val Loss vs Learning Rate (inverse U-shape) ===
plt.subplot(1, 2, 2)
plt.plot(learning_rates, min_val_losses, marker='o')
plt.xscale('log')
plt.xlabel('Learning Rate (log scale)')
plt.ylabel('Minimum Validation Loss')
plt.title('Validation Loss vs Learning Rate')
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

# === History folders per LR ===
history_folders = {
    '1e-06': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus6',
    '5e-06': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus6',
    '1e-05': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers',
    '5e-05': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus5',
    '1e-04': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4',
    '5e-04': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus4',
}

# === Collect metrics ===
learning_rates = []
best_val_accuracies = []
min_val_losses = []

for lr_str, folder in history_folders.items():
    history_path = os.path.join(folder, 'history_fold245_val1_frozen.pkl')
    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)
        best_val_accuracies.append(max(hist['val_accuracy']))
        min_val_losses.append(min(hist['val_loss']))
        learning_rates.append(float(lr_str))
        print(f"✅ LR={lr_str}: Val Acc={max(hist['val_accuracy']):.4f}, Val Loss={min(hist['val_loss']):.4f}")
    else:
        print(f"⚠️ Missing file for LR={lr_str}: {history_path}")

# === Convert to log scale for smoothing ===
x = np.log10(learning_rates)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, best_val_accuracies, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, min_val_losses, k=2)(x_smooth)

# === Plot smoothed dual-axis graph ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(learning_rates, best_val_accuracies, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(learning_rates, min_val_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Loss vs Learning Rate')
plt.grid(True)
fig.tight_layout()
plt.show()

### Create better line graph per LR

Train

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === GPU Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
base_model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_LRExperiment_5e6_to_5e5'
base_history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model ===
def create_mobilenetv2_model(image_shape, learning_rate):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate, clipvalue=1.0),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# === Learning rates to test ===
learning_rates = [
    5e-5,   # 0.00005
    7e-5,   # 0.00007
    1e-4,   # 0.0001
    2e-4,   # 0.0002
    3e-4,   # 0.0003
    4e-4,   # 0.0004
    5e-4,   # 0.0005
    7e-4,   # 0.0007
    1e-3    # 0.001
]


# === Training Loop ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_mobilenetv2_model(X_train.shape[1:], learning_rate=lr)

    model_dir = os.path.join(base_model_dir, f"LR_{lr:.0e}")
    history_dir = os.path.join(base_history_dir, f"LR_{lr:.0e}")
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed_time = time.time() - start_time
    print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

    # Save training history
    with open(os.path.join(history_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluation
    y_pred_probs = model.predict(X_val)
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred_labels, zero_division=1)
    precision = precision_score(y_val, y_pred_labels, zero_division=1)
    recall = recall_score(y_val, y_pred_labels, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred_labels)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    # Print Results
    print(f"📊 Results for LR={lr:.0e}")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# === Reload learning rates and set base history path ===
learning_rates = [
    5e-5, 7e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 7e-4, 1e-3
]
history_base_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

# === Gather metrics ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    path = os.path.join(history_base_dir, f"LR_{lr:.0e}", 'history_fold245_val1_frozen.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(min(hist['val_loss']))
        print(f"✅ LR={lr:.0e}: Val Acc={val_accuracies[-1]:.4f}, Val Loss={val_losses[-1]:.4f}")
    else:
        print(f"⚠️ Missing history file: {path}")
        val_accuracies.append(None)
        val_losses.append(None)

# === Filter missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Interpolate smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.set_xticks(filtered_lrs)
ax1.set_xticklabels([f"{lr:.0e}" for lr in filtered_lrs], rotation=45)
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Minimum Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('MobileNetV2 Validation Accuracy and Loss vs Learning Rate')
plt.grid(True)
fig.tight_layout()
plt.show()

### More LR

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === GPU Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
base_model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_LRExperiment_5e6_to_5e5'
base_history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2–5) ===
X_train, y_train = [], []
for fold_num in range(2, 6):
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model ===
def create_mobilenetv2_model(image_shape, learning_rate):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate, clipvalue=1.0),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# === Learning rates to test ===
learning_rates = [
    2e-3,   # 0.002
    3e-3,   # 0.003
    5e-3,   # 0.005
    7e-3,   # 0.007
    1e-2,   # 0.01
    2e-2,   # 0.02
    5e-2,   # 0.05
    1e-1    # 0.1
]


# === Training Loop ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_mobilenetv2_model(X_train.shape[1:], learning_rate=lr)

    model_dir = os.path.join(base_model_dir, f"LR_{lr:.0e}")
    history_dir = os.path.join(base_history_dir, f"LR_{lr:.0e}")
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'model_fold245_val1_frozen.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed_time = time.time() - start_time
    print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

    # Save training history
    with open(os.path.join(history_dir, 'history_fold245_val1_frozen.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluation
    y_pred_probs = model.predict(X_val)
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred_labels, zero_division=1)
    precision = precision_score(y_val, y_pred_labels, zero_division=1)
    recall = recall_score(y_val, y_pred_labels, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred_labels)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    # Print Results
    print(f"📊 Results for LR={lr:.0e}")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

plt.rcParams.update({
    'font.size': 14,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 14,     # Axis label font size
    'xtick.labelsize': 12,    # X-tick label font size
    'ytick.labelsize': 12,    # Y-tick label font size
    'legend.fontsize': 12     # Legend font size (if you add one)
})


# === Reload learning rates and set base history path ===
learning_rates = [
    5e-5, 7e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 7e-4, 1e-3, 2e-3,
    3e-3, 5e-3, 7e-3, 1e-2, 2e-2, 5e-2, 1e-1
]
history_base_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

# === Gather metrics ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    path = os.path.join(history_base_dir, f"LR_{lr:.0e}", 'history_fold245_val1_frozen.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))  # ⬅️ average instead of min
        print(f"✅ LR={lr:.0e}: Val Acc={val_accuracies[-1]:.4f}, Avg Val Loss={val_losses[-1]:.4f}")
    else:
        print(f"⚠️ Missing history file: {path}")
        val_accuracies.append(None)
        val_losses.append(None)

# === Filter missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Interpolate smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.set_xticks(filtered_lrs)
ax1.set_xticklabels([f"{lr:.0e}" for lr in filtered_lrs], rotation=45)
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (MobileNetV2)')
plt.grid(True)
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

plt.rcParams.update({
    'font.size': 14,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 14,     # Axis label font size
    'xtick.labelsize': 12,    # X-tick label font size
    'ytick.labelsize': 12,    # Y-tick label font size
    'legend.fontsize': 12     # Legend font size (if you add one)
})


# === Reload learning rates and set base history path ===
learning_rates = [
    5e-5, 7e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 7e-4, 1e-3, 2e-3,
    3e-3, 5e-3, 7e-3, 1e-2, 2e-2, 5e-2, 1e-1
]
history_base_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

# === Gather metrics ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    path = os.path.join(history_base_dir, f"LR_{lr:.0e}", 'history_fold245_val1_frozen.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))  # ⬅️ average instead of min
        print(f"✅ LR={lr:.0e}: Val Acc={val_accuracies[-1]:.4f}, Avg Val Loss={val_losses[-1]:.4f}")
    else:
        print(f"⚠️ Missing history file: {path}")
        val_accuracies.append(None)
        val_losses.append(None)

# === Filter missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Interpolate smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.set_xticks(filtered_lrs)
ax1.set_xticklabels([f"{lr:.0e}" for lr in filtered_lrs], rotation=45)
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (MobileNetV2)')
plt.grid(True)
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline
from matplotlib.ticker import LogLocator, LogFormatterSciNotation

# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

learning_rates = [
    5e-5, 7e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 7e-4, 1e-3, 2e-3,
    3e-3, 5e-3, 7e-3, 1e-2, 2e-2, 5e-2, 1e-1
]

# === Collect best val acc and average val loss from each file ===
val_accuracies = []
val_losses = []


for lr in learning_rates:
    path = os.path.join(history_dir, f"LR_{lr:.0e}", 'history_fold245_val1_frozen.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))  # ⬅️ average instead of min
        print(f"✅ LR={lr:.0e}: Val Acc={val_accuracies[-1]:.4f}, Avg Val Loss={val_losses[-1]:.4f}")
    else:
        print(f"⚠️ Missing history file: {path}")
        val_accuracies.append(None)
        val_losses.append(None)




# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for log-scale smoothing ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Validation Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.xaxis.set_major_locator(LogLocator(base=10.0, numticks=10))
ax1.xaxis.set_minor_locator(LogLocator(base=10.0, subs='auto', numticks=50))
ax1.xaxis.set_major_formatter(LogFormatterSciNotation())
ax1.tick_params(axis='x', rotation=45)
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Avg Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (MobileNetV2)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline
from matplotlib.ticker import LogLocator, LogFormatterSciNotation

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})


# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_LRExperiment_5e6_to_5e5'

learning_rates = [
    5e-5, 7e-5, 1e-4, 2e-4, 3e-4, 4e-4, 5e-4, 7e-4, 1e-3, 2e-3,
    3e-3, 5e-3, 7e-3, 1e-2, 2e-2, 5e-2, 1e-1
]

# === Collect best val acc and average val loss from each file ===
val_accuracies = []
val_losses = []


for lr in learning_rates:
    path = os.path.join(history_dir, f"LR_{lr:.0e}", 'history_fold245_val1_frozen.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))  # ⬅️ average instead of min
        print(f"✅ LR={lr:.0e}: Val Acc={val_accuracies[-1]:.4f}, Avg Val Loss={val_losses[-1]:.4f}")
    else:
        print(f"⚠️ Missing history file: {path}")
        val_accuracies.append(None)
        val_losses.append(None)




# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for log-scale smoothing ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Validation Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.xaxis.set_major_locator(LogLocator(base=10.0, numticks=10))
ax1.xaxis.set_minor_locator(LogLocator(base=10.0, subs='auto', numticks=50))
ax1.xaxis.set_major_formatter(LogFormatterSciNotation())
ax1.tick_params(axis='x', rotation=45)
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Avg Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (MobileNetV2)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

### Get metrics

In [None]:
import os
import numpy as np
import pickle
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Mapping: LR → history + model directories ===
setup = {
    '1e-06': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus6/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus6/model_fold245_val1_frozen.keras'
    },
    '5e-06': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus6/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus6/model_fold245_val1_frozen.keras'
    },
    '1e-05': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers/model_fold245_val1_frozen.keras'
    },
    '5e-05': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus5/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus5/model_fold245_val1_frozen.keras'
    },
    '1e-04': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold245_val1_frozen.keras'
    },
    '5e-04': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus4/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus4/model_fold245_val1_frozen.keras'
    },
}

# === Validation Data Loader (Fold 1) ===
def load_validation_data(val_dir):
    X_val, y_val = [], []
    for label in ['Good', 'Bad']:
        path = os.path.join(val_dir, label)
        if not os.path.exists(path):
            continue
        for fname in os.listdir(path):
            if fname.endswith('.jpg'):
                img = load_img(os.path.join(path, fname), target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                X_val.append(arr)
                y_val.append(1 if label == 'Good' else 0)
    return np.array(X_val), np.array(y_val)

val_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1'
X_val, y_val = load_validation_data(val_dir)

# === Evaluate ===
print("📊 Evaluation Metrics per Learning Rate:\n")
for lr, paths in setup.items():
    try:
        # Load history
        with open(paths['history'], 'rb') as f:
            hist = pickle.load(f)
        avg_val_loss = np.mean(hist['val_loss'])
        min_val_loss = min(hist['val_loss'])

        # Load model and predict
        model = load_model(paths['model'])
        y_probs = model.predict(X_val).flatten()
        y_pred = (y_probs > 0.5).astype(int)

        # Metrics
        acc = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred, zero_division=1)
        precision = precision_score(y_val, y_pred, zero_division=1)
        recall = recall_score(y_val, y_pred, zero_division=1)
        prec_vals, rec_vals, _ = precision_recall_curve(y_val, y_probs)
        auc_pr = auc(rec_vals, prec_vals)

        # Output
        print(f"🟢 LR {lr}")
        print(f"   Avg Val Loss : {avg_val_loss:.4f}")
        print(f"   Min Val Loss : {min_val_loss:.4f}")
        print(f"   Accuracy      : {acc:.4f}")
        print(f"   F1 Score      : {f1:.4f}")
        print(f"   Precision     : {precision:.4f}")
        print(f"   Recall        : {recall:.4f}")
        print(f"   AUC-PR        : {auc_pr:.4f}\n")

    except Exception as e:
        print(f"⚠️ Error evaluating LR {lr}: {e}")

Metrics on graph

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Learning Rate Setup ===
setup = {
    '1e-06': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus6/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus6/model_fold245_val1_frozen.keras'
    },
    '5e-06': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus6/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus6/model_fold245_val1_frozen.keras'
    },
    '1e-05': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers/model_fold245_val1_frozen.keras'
    },
    '5e-05': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus5/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus5/model_fold245_val1_frozen.keras'
    },
    '1e-04': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold245_val1_frozen.keras'
    },
    '5e-04': {
        'history': '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayers5Minus4/history_fold245_val1_frozen.pkl',
        'model': '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayers5Minus4/model_fold245_val1_frozen.keras'
    },
}

# === Load Validation Data ===
def load_validation_data(val_dir):
    X_val, y_val = [], []
    for label in ['Good', 'Bad']:
        path = os.path.join(val_dir, label)
        if not os.path.exists(path):
            continue
        for fname in os.listdir(path):
            if fname.endswith('.jpg'):
                img = load_img(os.path.join(path, fname), target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                X_val.append(arr)
                y_val.append(1 if label == 'Good' else 0)
    return np.array(X_val), np.array(y_val)

val_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1'
X_val, y_val = load_validation_data(val_dir)

# === Metrics Storage ===
learning_rates = []
accuracies, f1_scores, precisions, recalls, aucs, val_losses = [], [], [], [], [], []

# === Evaluate Each LR ===
for lr, paths in setup.items():
    try:
        with open(paths['history'], 'rb') as f:
            hist = pickle.load(f)
        avg_val_loss = np.mean(hist['val_loss'])

        model = load_model(paths['model'])
        y_probs = model.predict(X_val).flatten()
        y_pred = (y_probs > 0.5).astype(int)

        acc = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred, zero_division=1)
        precision = precision_score(y_val, y_pred, zero_division=1)
        recall = recall_score(y_val, y_pred, zero_division=1)
        prec_vals, rec_vals, _ = precision_recall_curve(y_val, y_probs)
        auc_pr = auc(rec_vals, prec_vals)

        learning_rates.append(lr)
        accuracies.append(acc)
        f1_scores.append(f1)
        precisions.append(precision)
        recalls.append(recall)
        aucs.append(auc_pr)
        val_losses.append(avg_val_loss)

    except Exception as e:
        print(f"⚠️ Error evaluating LR {lr}: {e}")

# === Plotting ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Left axis: Accuracy, F1, Precision, Recall, AUC
ax1.plot(learning_rates, accuracies, marker='o', label='Accuracy')
ax1.plot(learning_rates, f1_scores, marker='o', label='F1 Score')
ax1.plot(learning_rates, precisions, marker='o', label='Precision')
ax1.plot(learning_rates, recalls, marker='o', label='Recall')
ax1.plot(learning_rates, aucs, marker='o', label='AUC-PR')
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Score')
ax1.set_title('Validation Metrics per Learning Rate')
ax1.legend(loc='upper left')
ax1.grid(True)

# Right axis: Validation Loss
ax2 = ax1.twinx()
ax2.plot(learning_rates, val_losses, marker='o', color='black', linestyle='--', label='Avg Val Loss')
ax2.set_ylabel('Avg Validation Loss', color='black')
ax2.tick_params(axis='y', labelcolor='black')

# Combine legends
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax1.legend(lines_1 + lines_2, labels_1 + labels_2, loc='center right')

plt.tight_layout()
plt.show()

Heatmap

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tf_keras_vis.utils.model_modifiers import ReplaceToLinear
from tf_keras_vis.gradcam_plus_plus import GradcamPlusPlus

# === Paths ===
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold245_val1_frozen.keras'
val_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1'

# === Load model ===
model = load_model(model_path)

# === Load images ===
def load_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        folder = os.path.join(image_dir, subdir)
        for fname in os.listdir(folder):
            if fname.endswith('.jpg'):
                path = os.path.join(folder, fname)
                img = load_img(path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
                paths.append(path)
    return np.array(images), np.array(labels), paths

X_val, y_val, val_paths = load_images_and_labels(val_dir)
y_probs = model.predict(X_val).flatten()
y_preds = (y_probs > 0.5).astype(int)

# === Select first 10 images with predictions ===
selected_samples = []
for img, true, pred, path in zip(X_val, y_val, y_preds, val_paths):
    if len(selected_samples) < 5:
        selected_samples.append((img, true, pred, path))
    else:
        break

# === Grad-CAM++ setup ===
def score_function(output):
    return output[:, 0]

model_modifier = ReplaceToLinear()
gradcam = GradcamPlusPlus(model, model_modifier=model_modifier)

# === Plot function ===
def plot_gradcam_plus(samples):
    plt.figure(figsize=(20, 5))
    for i, (img, true, pred, path) in enumerate(samples):
        cam = gradcam(score_function, np.expand_dims(img, axis=0), penultimate_layer='Conv_1')[0]
        heatmap = np.uint8(255 * cam)
        heatmap_colored = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
        original = (img * 255).astype(np.uint8)
        overlay = cv2.addWeighted(cv2.cvtColor(original, cv2.COLOR_RGB2BGR), 0.6, heatmap_colored, 0.4, 0)

        plt.subplot(2, 5, i + 1)
        plt.imshow(cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB))
        fname = os.path.basename(path)
        plt.title(f"{fname}\nTrue: {'Good' if true else 'Bad'} | Pred: {'Good' if pred else 'Bad'}", fontsize=8)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# === Display Grad-CAM++ overlays ===
plot_gradcam_plus(selected_samples)

## Take best lr and train 5 folds 4e-4

In [None]:
from tensorflow.keras.models import load_model

# Load the model
model = load_model('/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras')

# Show the model architecture
model.summary()

Fold 1

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (2, 3, 4, 5) ===
X_train, y_train = [], []
for fold_num in [2, 3, 4, 5]:  # Fold1 will be validation
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold1) ===
val_dir = os.path.join(base_fold_dir, 'Fold1')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=4e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold2345_val1_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold2345_val1_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold1:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 2, 3, 4, 5 with Fold 1 as validation (All layers frozen).")

Fold 2

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (1, 3, 4, 5) ===
X_train, y_train = [], []
for fold_num in [1, 3, 4, 5]:  # Changed to exclude Fold2 from training
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold2) ===
val_dir = os.path.join(base_fold_dir, 'Fold2')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=4e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold1345_val2_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold1345_val2_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold2:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 1, 3, 4, 5 with Fold 2 as validation (All layers frozen).")

Fold 3

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (1, 2, 4, 5) ===
X_train, y_train = [], []
for fold_num in [1, 2, 4, 5]:  # Fold 3 is excluded
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold3) ===
val_dir = os.path.join(base_fold_dir, 'Fold3')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=4e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold1245_val3_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold1245_val3_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold3:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 1, 2, 4, 5 with Fold 3 as validation (All layers frozen).")

Fold 4

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (1, 2, 3, 5) ===
X_train, y_train = [], []
for fold_num in [1, 2, 3, 5]:  # Fold 4 is excluded
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold4) ===
val_dir = os.path.join(base_fold_dir, 'Fold4')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=4e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold1235_val4_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold1235_val4_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold4:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 1, 2, 3, 5 with Fold 4 as validation (All layers frozen).")

Fold 5

In [None]:
import os
import numpy as np
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
import time

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Function to load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                image_path = os.path.join(full_path, fname)
                image_array = load_img(image_path, target_size=(224, 224))
                images.append(img_to_array(image_array) / 255.0)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Load Train Folds (1, 2, 3, 4) ===
X_train, y_train = [], []
for fold_num in [1, 2, 3, 4]:  # Fold 5 is excluded
    fold_dir = os.path.join(base_fold_dir, f'Fold{fold_num}')
    images, labels = load_images_and_labels(fold_dir)
    X_train.append(images)
    y_train.append(labels)

X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Fold (Fold5) ===
val_dir = os.path.join(base_fold_dir, 'Fold5')
X_val, y_val = load_images_and_labels(val_dir)

# === Build Model (Fully Frozen) ===
def create_mobilenetv2_model(image_shape):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=image_shape)
    
    base_model.trainable = False  # Freeze all layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=4e-4, clipvalue=1.0),
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    return model

model = create_mobilenetv2_model(X_train.shape[1:])

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=15, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, 'model_fold1234_val5_frozen.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start_time = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed_time = time.time() - start_time
print(f"⏱️ Training Time: {elapsed_time:.2f} seconds")

# === Save History ===
with open(os.path.join(history_save_dir, 'history_fold1234_val5_frozen.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred_labels = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred_labels, zero_division=1)
precision = precision_score(y_val, y_pred_labels, zero_division=1)
recall = recall_score(y_val, y_pred_labels, zero_division=1)
accuracy = accuracy_score(y_val, y_pred_labels)

precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

# === Print Results ===
print(f"\n🔍 Evaluation on Fold5:")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

print("✅ Training complete using Folds 1, 2, 3, 4 with Fold 5 as validation (All layers frozen).")

### Analyze

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Directory containing histories ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'

# === History filenames for each fold ===
history_files = {
    'Fold 1': 'history_fold2345_val1_frozen.pkl',
    'Fold 2': 'history_fold1345_val2_frozen.pkl',
    'Fold 3': 'history_fold1245_val3_frozen.pkl',
    'Fold 4': 'history_fold1235_val4_frozen.pkl',
    'Fold 5': 'history_fold1234_val5_frozen.pkl'
}

# === Define consistent colors per fold ===
fold_colors = {
    'Fold 1': 'blue',
    'Fold 2': 'green',
    'Fold 3': 'orange',
    'Fold 4': 'red',
    'Fold 5': 'purple'
}

# === Setup side-by-side plots ===
fig, axes = plt.subplots(1, 2, figsize=(16, 6))  # One row, two columns

# === Accuracy Plot (left) ===
for fold, filename in history_files.items():
    path = os.path.join(history_dir, filename)
    color = fold_colors[fold]
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        axes[0].plot(hist['accuracy'], color=color, linestyle='-', label=f'{fold} Train')
        axes[0].plot(hist['val_accuracy'], color=color, linestyle='--', label=f'{fold} Val')

axes[0].set_title('Training and Validation Accuracy per Fold')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# === Loss Plot (right) ===
for fold, filename in history_files.items():
    path = os.path.join(history_dir, filename)
    color = fold_colors[fold]
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        axes[1].plot(hist['loss'], color=color, linestyle='-', label=f'{fold} Train')
        axes[1].plot(hist['val_loss'], color=color, linestyle='--', label=f'{fold} Val')

axes[1].set_title('Training and Validation Loss per Fold')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

# === Final Layout ===
plt.suptitle('MobileNetV2: Accuracy and Loss per Fold', fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})


# === Directory where history files are saved ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/History_MobileNet2_Frozen_AllLayersMinus4'

# === Fold numbers ===
folds = [1, 2, 3, 4, 5]

def load_history(fold):
    if fold == 1:
        filename = 'history_fold2345_val1_frozen.pkl'
    elif fold == 2:
        filename = 'history_fold1345_val2_frozen.pkl'
    elif fold == 3:
        filename = 'history_fold1245_val3_frozen.pkl'
    elif fold == 4:
        filename = 'history_fold1235_val4_frozen.pkl'
    elif fold == 5:
        filename = 'history_fold1234_val5_frozen.pkl'
    else:
        print(f"❌ Invalid fold: {fold}")
        return None

    file_path = os.path.join(history_save_dir, filename)

    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    else:
        print(f"❌ History file not found for Fold {fold}: {file_path}")
        return None

def plot_train_val_graphs(fold_histories):
    plt.figure(figsize=(14, 6))

    # Plot Loss
    plt.subplot(1, 2, 1)
    for fold, history in fold_histories.items():
        plt.plot(history['loss'], color='blue', alpha=0.6, label=f'Fold {fold} Train Loss' if fold == 1 else "")
        plt.plot(history['val_loss'], linestyle='--', color='red', alpha=0.6, label=f'Fold {fold} Val Loss' if fold == 1 else "")
    plt.title('Training and Validation Loss Across Folds (MobileNetV2)')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(['Train Loss', 'Val Loss'])
    plt.grid(alpha=0.3)

    # Plot Accuracy
    plt.subplot(1, 2, 2)
    for fold, history in fold_histories.items():
        plt.plot(history['accuracy'], color='blue', alpha=0.6, label=f'Fold {fold} Train Acc' if fold == 1 else "")
        plt.plot(history['val_accuracy'], linestyle='--', color='red', alpha=0.6, label=f'Fold {fold} Val Acc' if fold == 1 else "")
    plt.title('Training and Validation Accuracy Across Folds (MobileNetV2)')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(['Train Accuracy', 'Val Accuracy'])
    plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

# === Load all histories ===
fold_histories = {}
for fold in folds:
    history = load_history(fold)
    if history:
        fold_histories[fold] = history

if fold_histories:
    plot_train_val_graphs(fold_histories)
else:
    print("🚫 No valid history files found.")

In [None]:
import numpy as np

train_accuracies, val_accuracies = [], []
train_losses, val_losses = [], []

for fold, hist in fold_histories.items():
    train_accuracies.append(hist['accuracy'][-1] * 100)
    val_accuracies.append(hist['val_accuracy'][-1] * 100)
    train_losses.append(hist['loss'][-1])
    val_losses.append(hist['val_loss'][-1])
    print(f"📁 Fold {fold}:")
    print(f"   ✅ Train Accuracy: {train_accuracies[-1]:.2f}%")
    print(f"   ✅ Train Loss:     {train_losses[-1]:.4f}")
    print(f"   ✅ Val Accuracy:   {val_accuracies[-1]:.2f}%")
    print(f"   ✅ Val Loss:       {val_losses[-1]:.4f}\n")

print("📈 Averages:")
print(f"🔹 Train Accuracy: {np.mean(train_accuracies):.2f}% ± {np.std(train_accuracies):.2f}")
print(f"🔹 Train Loss:     {np.mean(train_losses):.4f} ± {np.std(train_losses):.4f}")
print(f"🔹 Val Accuracy:   {np.mean(val_accuracies):.2f}% ± {np.std(val_accuracies):.2f}")
print(f"🔹 Val Loss:       {np.mean(val_losses):.4f} ± {np.std(val_losses):.4f}")

Val metrics

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Directories ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# === Map folds to model and validation set
folds = {
    1: {'model': 'model_fold2345_val1_frozen.keras', 'val': 'Fold1'},
    2: {'model': 'model_fold1345_val2_frozen.keras', 'val': 'Fold2'},
    3: {'model': 'model_fold1245_val3_frozen.keras', 'val': 'Fold3'},
    4: {'model': 'model_fold1235_val4_frozen.keras', 'val': 'Fold4'},
    5: {'model': 'model_fold1234_val5_frozen.keras', 'val': 'Fold5'}
}

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Evaluate each fold ===
metrics = []
for fold, paths in folds.items():
    print(f"\n📂 Evaluating Fold {fold}")
    
    model_path = os.path.join(model_dir, paths['model'])
    val_dir = os.path.join(base_fold_dir, paths['val'])

    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}, skipping.")
        continue

    model = load_model(model_path)
    X_val, y_val = load_images_and_labels(val_dir)

    # Predict
    y_probs = model.predict(X_val, verbose=0).flatten()
    y_preds = (y_probs > 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y_val, y_preds)
    f1 = f1_score(y_val, y_preds)
    prec = precision_score(y_val, y_preds)
    rec = recall_score(y_val, y_preds)
    prec_vals, rec_vals, _ = precision_recall_curve(y_val, y_probs)
    auc_pr = auc(rec_vals, prec_vals)

    metrics.append({
        'Fold': fold,
        'Accuracy': acc,
        'F1 Score': f1,
        'Precision': prec,
        'Recall': rec,
        'AUC-PR': auc_pr
    })

    print(f"  Accuracy : {acc:.4f}")
    print(f"  F1 Score : {f1:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  AUC-PR   : {auc_pr:.4f}")
    print("-" * 40)

# === Print average metrics ===
print("\n📊 Average Validation Metrics Across Folds:")
avg = {k: np.mean([m[k] for m in metrics]) for k in metrics[0] if k != 'Fold'}
for k, v in avg.items():
    print(f"{k}: {v:.4f}")

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Directories ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# === Map folds to model and validation set
folds = {
    1: {'model': 'model_fold2345_val1_frozen.keras', 'val': 'Fold1'},
    2: {'model': 'model_fold1345_val2_frozen.keras', 'val': 'Fold2'},
    3: {'model': 'model_fold1245_val3_frozen.keras', 'val': 'Fold3'},
    4: {'model': 'model_fold1235_val4_frozen.keras', 'val': 'Fold4'},
    5: {'model': 'model_fold1234_val5_frozen.keras', 'val': 'Fold5'}
}

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Evaluate each fold ===
metrics = []
for fold, paths in folds.items():
    print(f"\n📂 Evaluating Fold {fold}")
    
    model_path = os.path.join(model_dir, paths['model'])
    val_dir = os.path.join(base_fold_dir, paths['val'])

    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}, skipping.")
        continue

    model = load_model(model_path)
    X_val, y_val = load_images_and_labels(val_dir)

    # Predict
    y_probs = model.predict(X_val, verbose=0).flatten()
    y_preds = (y_probs > 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y_val, y_preds)
    f1 = f1_score(y_val, y_preds)
    prec = precision_score(y_val, y_preds)
    rec = recall_score(y_val, y_preds)
    prec_vals, rec_vals, _ = precision_recall_curve(y_val, y_probs)
    auc_pr = auc(rec_vals, prec_vals)

    metrics.append({
        'Fold': fold,
        'Accuracy': acc,
        'F1 Score': f1,
        'Precision': prec,
        'Recall': rec,
        'AUC-PR': auc_pr
    })

    print(f"  Accuracy : {acc:.4f}")
    print(f"  F1 Score : {f1:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  AUC-PR   : {auc_pr:.4f}")
    print("-" * 40)

# === Compute Mean and Std Dev ===
print("\n📊 Average and Standard Deviation of Validation Metrics Across Folds:")
keys = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']
for key in keys:
    values = [m[key] for m in metrics]
    mean = np.mean(values)
    std = np.std(values)
    print(f"{key}: {mean:.4f} ± {std:.4f}")

### Final results

Threshold tweaking

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Paths ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
base_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

# === Fold mapping
folds = {
    1: {'model': 'model_fold2345_val1_frozen.keras', 'val': 'Fold1'},
    2: {'model': 'model_fold1345_val2_frozen.keras', 'val': 'Fold2'},
    3: {'model': 'model_fold1245_val3_frozen.keras', 'val': 'Fold3'},
    4: {'model': 'model_fold1235_val4_frozen.keras', 'val': 'Fold4'},
    5: {'model': 'model_fold1234_val5_frozen.keras', 'val': 'Fold5'}
}

# === Load images and labels
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = img_to_array(img) / 255.0
                images.append(img_arr)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Store predictions and labels
all_probs = []
all_labels = []

for fold, info in folds.items():
    print(f"📂 Evaluating Fold {fold}...")

    model_path = os.path.join(model_dir, info['model'])
    val_dir = os.path.join(base_dir, info['val'])

    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}, skipping.")
        continue

    model = load_model(model_path)
    X_val, y_val = load_images_and_labels(val_dir)
    y_probs = model.predict(X_val, verbose=0).flatten()

    all_probs.extend(y_probs)
    all_labels.extend(y_val)

# === Convert to arrays
all_probs = np.array(all_probs)
all_labels = np.array(all_labels)

# === Plot scatter
plt.figure(figsize=(12, 6))
for i in range(len(all_probs)):
    color = 'green' if all_labels[i] == 1 else 'red'
    plt.scatter(i, all_probs[i], color=color, alpha=0.6)

plt.axhline(y=0.5, color='black', linestyle='--', label='Threshold = 0.5')
plt.title('Scatter Plot of Predicted Probabilities on Validation Sets (All Folds)')
plt.xlabel('Sample Index')
plt.ylabel('Predicted Probability')
plt.ylim(0, 1.05)
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()


threshold = 0.5
y_pred = (all_probs >= threshold).astype(int)

acc = accuracy_score(all_labels, y_pred)
f1 = f1_score(all_labels, y_pred)
prec = precision_score(all_labels, y_pred)
rec = recall_score(all_labels, y_pred)
prec_curve, rec_curve, _ = precision_recall_curve(all_labels, all_probs)
auc_pr = auc(rec_curve, prec_curve)

print("\n📊 Average Metrics on All Validation Sets using Threshold = 0.5")
print(f"Accuracy : {acc:.4f}")
print(f"F1 Score : {f1:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"AUC-PR   : {auc_pr:.4f}")

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Directories ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

# === Map folds to models (all evaluate on same test set)
folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Load test set once ===
X_test, y_test = load_images_and_labels(test_dir)

# === Evaluate each fold's model on the test set ===
metrics = []
for fold, model_name in folds.items():
    print(f"\n📂 Evaluating Fold {fold} Model on Test Set")
    
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}, skipping.")
        continue

    model = load_model(model_path)

    # Predict
    y_probs = model.predict(X_test, verbose=0).flatten()
    y_preds = (y_probs > 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y_test, y_preds)
    f1 = f1_score(y_test, y_preds)
    prec = precision_score(y_test, y_preds)
    rec = recall_score(y_test, y_preds)
    prec_vals, rec_vals, _ = precision_recall_curve(y_test, y_probs)
    auc_pr = auc(rec_vals, prec_vals)

    metrics.append({
        'Fold': fold,
        'Accuracy': acc,
        'F1 Score': f1,
        'Precision': prec,
        'Recall': rec,
        'AUC-PR': auc_pr
    })

    print(f"  Accuracy : {acc:.4f}")
    print(f"  F1 Score : {f1:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  AUC-PR   : {auc_pr:.4f}")
    print("-" * 40)

# === Print average and std metrics ===
print("\n📊 Average and Std Dev of Test Metrics Across Folds:")
keys = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']
for key in keys:
    values = [m[key] for m in metrics]
    mean = np.mean(values)
    std = np.std(values)
    print(f"{key}: {mean:.4f} ± {std:.4f}")

pair excel with individual rating

In [None]:
import os
import pandas as pd

# === Paths ===
excel_path = '/Users/suzetteschulenburg/Desktop/Masters/Data/Publsih excel/CattleRecords.xlsx'
good_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Good'
bad_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad'

# === Load Excel with proper header ===
df = pd.read_excel(excel_path, header=1)
df.columns = df.columns.str.strip()  # Clean up any whitespace

# Keep only necessary columns
df = df[['ID', 'Rating']]
df['ID_Prefix'] = df['ID'].str.extract(r'(^[A-Z]+\d+)')  # remove trailing _1 etc.

# === Extract image info ===
image_data = []
for label, folder in [('Good', good_dir), ('Bad', bad_dir)]:
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
            match_id = fname.split('_')[0]  # extract prefix from image filename
            image_data.append({
                'Filename': fname,
                'Folder': label,
                'ID_Prefix': match_id
            })

image_df = pd.DataFrame(image_data)

# === Merge Excel and Image Data ===
merged_df = pd.merge(image_df, df, on='ID_Prefix', how='left')
merged_df = merged_df[['Filename', 'Folder', 'ID', 'Rating']]

# === Show result ===
print(merged_df.head())

# === Optionally save
# merged_df.to_csv("paired_image_rating_table.csv", index=False)

In [None]:
# === First 10 from each class ===
first_10_good = merged_df[merged_df['Folder'] == 'Good'].head(10)
first_10_bad = merged_df[merged_df['Folder'] == 'Bad'].head(10)

# === Display
print("🟢 First 10 GOOD Images:")
print(first_10_good.to_string(index=False))

print("\n🔴 First 10 BAD Images:")
print(first_10_bad.to_string(index=False))

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Directories ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'
excel_path = '/Users/suzetteschulenburg/Desktop/Masters/Data/Publsih excel/CattleRecords.xlsx'

# === Map folds to models
folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load ratings
df = pd.read_excel(excel_path, header=1)  # Skip header row
df.columns = ['ID', 'Photo Number', 'Rating']
df['ID'] = df['ID'].astype(str)

# === Load images and labels with filenames
filenames, images, labels = [], [], []
for label_folder in ['Good', 'Bad']:
    label_path = os.path.join(test_dir, label_folder)
    label = 1 if label_folder == 'Good' else 0
    for fname in os.listdir(label_path):
        if fname.endswith('.png') or fname.endswith('.jpg'):
            path = os.path.join(label_path, fname)
            img = load_img(path, target_size=(224, 224))
            arr = img_to_array(img) / 255.0
            images.append(arr)
            labels.append(label)
            filenames.append(fname)

X_test = np.array(images)
y_test = np.array(labels)

# === Evaluate all folds and record predictions
results = []

for fold, model_name in folds.items():
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_test).flatten()
    y_preds = (y_probs > 0.5).astype(int)

    for fname, prob, true_label, pred_label in zip(filenames, y_probs, y_test, y_preds):
        base_id = fname.split('_IMG_')[0] if '_IMG_' in fname else fname.split('_')[0]
        rating_row = df[df['ID'].str.contains(base_id)]
        rating = rating_row['Rating'].values[0] if not rating_row.empty else None
        results.append({
            'Fold': fold,
            'Filename': fname,
            'ID': base_id,
            'Rating': rating,
            'TrueLabel': true_label,
            'PredProb': round(prob, 4),
            'PredLabel': int(pred_label),
            'Agreement': '✅' if true_label == pred_label else '❌'
        })

results_df = pd.DataFrame(results)
results_df.to_csv("fold_predictions_with_ratings.csv", index=False)
print("✅ Table saved to 'fold_predictions_with_ratings.csv'")

Take highest prop

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

# === Paths ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'
excel_path = '/Users/suzetteschulenburg/Desktop/Masters/Data/Publsih excel/CattleRecords.xlsx'

# === Model folds ===
folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load ratings from Excel (headers in second row) ===
df_ratings = pd.read_excel(excel_path, header=1)
df_ratings = df_ratings.rename(columns={df_ratings.columns[0]: "ID", df_ratings.columns[2]: "Rating"})

# === Load test set ===
image_data = []
for label_str, label_val in [('Good', 1), ('Bad', 0)]:
    subdir = os.path.join(test_dir, label_str)
    for fname in os.listdir(subdir):
        if fname.lower().endswith(('.jpg', '.png')):
            img_path = os.path.join(subdir, fname)
            img = load_img(img_path, target_size=(224, 224))
            img_arr = img_to_array(img) / 255.0
            image_data.append({
                'filename': fname,
                'filepath': img_path,
                'image': img_arr,
                'true_label': label_val
            })

df_images = pd.DataFrame(image_data)
df_images['ID'] = df_images['filename'].str.extract(r'(.*)_IMG')[0]
df_images = df_images.merge(df_ratings[['ID', 'Rating']], on='ID', how='left')
X = np.stack(df_images['image'].values)

# === Evaluate models ===
results = []
all_cm = np.array([[0, 0], [0, 0]])

for fold, model_name in folds.items():
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        continue
    print(f"Evaluating Fold {fold}...")
    model = load_model(model_path)

    y_probs = model.predict(X, verbose=0).flatten()
    df_images['pred_prob'] = y_probs
    df_images['pred_label'] = (y_probs > 0.5).astype(int)

    df_top = df_images.loc[df_images.groupby('ID')['pred_prob'].idxmax()]
    y_true = df_top['true_label']
    y_pred = df_top['pred_label']

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)
    all_cm += cm

    results.append({
        'Fold': fold,
        'Accuracy': acc,
        'F1 Score': f1,
        'Precision': prec,
        'Recall': rec
    })

# === Show results ===
df_results = pd.DataFrame(results)
print("\nFold-wise Metrics:")
print(df_results)

print("\nAverage ± Std:")
for col in ['Accuracy', 'F1 Score', 'Precision', 'Recall']:
    avg = df_results[col].mean()
    std = df_results[col].std()
    print(f"{col}: {avg:.4f} ± {std:.4f}")

# === Confusion matrix ===
plt.figure(figsize=(5, 4))
sns.heatmap(all_cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Bad', 'Good'], yticklabels=['Bad', 'Good'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Cumulative Confusion Matrix Across All Folds')
plt.tight_layout()
plt.show()

### Analyze test

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Paths ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
fold1_model = 'model_fold2345_val1_frozen.keras'  # Fold 1 model
image_path = '/Users/suzetteschulenburg/Desktop/18006865-b37c-40e9-abde-c11e2a496465.jpg'        # <-- put your image path here

# === Settings ===
target_size = (224, 224)
threshold = 0.5  # decision threshold

# === Load model ===
model_path = os.path.join(model_dir, fold1_model)
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model not found: {model_path}")
model = load_model(model_path)

# === Preprocess single image ===
if not os.path.exists(image_path):
    raise FileNotFoundError(f"Image not found: {image_path}")

img = load_img(image_path, target_size=target_size)
arr = img_to_array(img) / 255.0
arr = np.expand_dims(arr, axis=0)  # (1, 224, 224, 3)

# === Predict ===
prob_good = float(model.predict(arr, verbose=0).squeeze())  # sigmoid output
pred_label = 1 if prob_good > threshold else 0
label_name = 'Good' if pred_label == 1 else 'Bad'

print(f"Image: {image_path}")
print(f"P(Good) = {prob_good:.4f}")
print(f"Predicted class: {label_name} (threshold={threshold})")

In [None]:
import os
import numpy as np
from PIL import Image
import tensorflow as tf

# === Paths ===
tflite_path = "/Users/suzetteschulenburg/Desktop/Wag/cow_model.tflite"
image_path  =  '/Users/suzetteschulenburg/Desktop/18006865-b37c-40e9-abde-c11e2a496465.jpg'    
# Optional: set your Keras model to compare (or leave None)
keras_model_path = "/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras"

# === Settings ===
target_size = (224, 224)
threshold = 0.5  # decision threshold

def load_image_array(path, size=(224, 224)):
    img = Image.open(path).convert("RGB").resize(size, Image.BILINEAR)
    arr = np.asarray(img, dtype=np.float32) / 255.0  # [0,1] normalize
    return np.expand_dims(arr, axis=0)  # (1, H, W, 3)

def run_tflite(model_path, input_batch):
    """Returns a float probability in [0,1] for class 'Good'."""
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()

    input_details  = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # Some TFLite models differ in size/dtype; adapt if needed
    idx_in  = input_details[0]["index"]
    idx_out = output_details[0]["index"]

    # If the model expects uint8 (quantized), quantize the input
    in_dtype = input_details[0]["dtype"]
    x = input_batch
    if in_dtype == np.uint8:
        scale, zero_point = input_details[0]["quantization"]
        if scale == 0:  # Safety for non-quantized metadata
            scale, zero_point = 1.0, 0
        x = (x / scale + zero_point).round().astype(np.uint8)
    else:
        x = x.astype(np.float32)

    # Resize tensor if the model has a different input shape
    expected_shape = input_details[0]["shape"]
    if list(expected_shape) != list(x.shape):
        interpreter.resize_tensor_input(idx_in, x.shape, strict=False)
        interpreter.allocate_tensors()

    interpreter.set_tensor(idx_in, x)
    interpreter.invoke()
    y = interpreter.get_tensor(idx_out)

    # Dequantize output if needed
    out_dtype = output_details[0]["dtype"]
    if out_dtype == np.uint8:
        scale, zero_point = output_details[0]["quantization"]
        if scale == 0:
            scale, zero_point = 1.0, 0
        y = scale * (y.astype(np.float32) - zero_point)
    else:
        y = y.astype(np.float32)

    # Flatten and cast to float
    prob_good = float(np.squeeze(y))
    # If your TFLite outputs logits, pass through sigmoid:
    # prob_good = float(1 / (1 + np.exp(-prob_good)))
    return prob_good

def run_keras(model_path, input_batch):
    model = tf.keras.models.load_model(model_path)
    prob_good = float(np.squeeze(model.predict(input_batch, verbose=0)))
    return prob_good

# === Load image ===
if not os.path.exists(image_path):
    raise FileNotFoundError(f"Image not found: {image_path}")
x = load_image_array(image_path, target_size)

# === TFLite ===
if not os.path.exists(tflite_path):
    raise FileNotFoundError(f"TFLite model not found: {tflite_path}")
tflite_prob = run_tflite(tflite_path, x)
tflite_pred = "Good" if tflite_prob > threshold else "Bad"

print("—— TFLite ——————————————————————————")
print(f"Image:      {image_path}")
print(f"P(Good):    {tflite_prob:.4f}")
print(f"Pred class: {tflite_pred} (thr={threshold})")

# === Optional: Keras comparison ===
if keras_model_path and os.path.exists(keras_model_path):
    keras_prob = run_keras(keras_model_path, x)
    keras_pred = "Good" if keras_prob > threshold else "Bad"
    print("\n—— Keras ———————————————————————————")
    print(f"P(Good):    {keras_prob:.4f}")
    print(f"Pred class: {keras_pred} (thr={threshold})")

    # Quick diff
    delta = tflite_prob - keras_prob
    agree = (tflite_pred == keras_pred)
    print("\n—— Compare —————————————————————————")
    print(f"Δ Prob (TFLite−Keras): {delta:+.4f}")
    print(f"Agreement:             {agree}")
else:
    print("\n(Keras model not provided or not found; skipped comparison.)")

In [None]:
image_path  = '/Users/suzetteschulenburg/Desktop/Cow1.png'    

In [None]:
import os
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import load_model

# === Paths ===

keras_model_path = "/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras"
tflite_path = "/Users/suzetteschulenburg/Desktop/Wag/cow_model.tflite"

# === Settings (shared) ===
target_size = (224, 224)
threshold = 0.5  # decision threshold, same for both

def preprocess(path, size=(224, 224)):
    """Shared preprocessing: RGB -> resize -> float32 in [0,1] -> (1,H,W,3)."""
    img = Image.open(path).convert("RGB").resize(size, Image.BILINEAR)
    x = np.asarray(img, dtype=np.float32) / 255.0
    return np.expand_dims(x, axis=0)

def predict_keras(model_path, x_batch):
    model = load_model(model_path)
    prob = float(np.squeeze(model.predict(x_batch, verbose=0)))
    return prob

def predict_tflite(model_path, x_batch):
    """Feeds the SAME x_batch; adapts only to TFLite tensor dtype if needed."""
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    in_det  = interpreter.get_input_details()[0]
    out_det = interpreter.get_output_details()[0]

    # Adapt dtype if model is quantized uint8 (not preprocessing—just dtype)
    x = x_batch
    if in_det["dtype"] == np.uint8:
        scale, zp = in_det["quantization"]
        if scale == 0: scale, zp = 1.0, 0
        x = np.round(x / scale + zp).astype(np.uint8)
    else:
        x = x.astype(np.float32)

    # Resize input tensor if needed (keeps SAME preprocessed data)
    if list(in_det["shape"]) != list(x.shape):
        interpreter.resize_tensor_input(in_det["index"], x.shape, strict=False)
        interpreter.allocate_tensors()

    interpreter.set_tensor(in_det["index"], x)
    interpreter.invoke()
    y = interpreter.get_tensor(out_det["index"]).astype(np.float32)

    # Dequantize output if needed
    if out_det["dtype"] == np.uint8:
        scale, zp = out_det["quantization"]
        if scale == 0: scale, zp = 1.0, 0
        y = scale * (y - zp)

    prob = float(np.squeeze(y))
    # If your TFLite head outputs logits (rare in your setup), uncomment:
    # prob = 1.0 / (1.0 + np.exp(-prob))
    return prob

# === Run ===
if not os.path.exists(image_path):
    raise FileNotFoundError(image_path)
x = preprocess(image_path, target_size)

# TFLite
if not os.path.exists(tflite_path):
    raise FileNotFoundError(tflite_path)
tfl_prob = predict_tflite(tflite_path, x)
tfl_pred = "Good" if tfl_prob > threshold else "Bad"

# Keras
if not os.path.exists(keras_model_path):
    raise FileNotFoundError(keras_model_path)
kr_prob = predict_keras(keras_model_path, x)
kr_pred = "Good" if kr_prob > threshold else "Bad"

# === Print side-by-side ===
print(f"Image: {image_path}")
print("—— TFLite ———————————————")
print(f"P(Good): {tfl_prob:.4f} | Pred: {tfl_pred} (thr={threshold})")
print("—— Keras ————————————————")
print(f"P(Good): {kr_prob:.4f} | Pred: {kr_pred} (thr={threshold})")
print("—— Compare ——————————————")
print(f"Δ Prob (TFLite−Keras): {tfl_prob - kr_prob:+.4f}")
print(f"Agreement: {tfl_pred == kr_pred}")

Get YOLO too

In [None]:
image_path  = "/Users/suzetteschulenburg/Desktop/MainUse/Test/Bad/E2120_IMG_8289.jpg"  

In [None]:
import os
import numpy as np
import cv2
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import load_model
from ultralytics import YOLO

# === Paths ===
keras_model_path = "/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras"
tflite_path = "/Users/suzetteschulenburg/Desktop/Wag/cow_model.tflite"
yolo_weights = "/Users/suzetteschulenburg/Desktop/Wag/yolov8s-seg.pt"  # or wherever your weights are


# === Settings ===
target_size = (224, 224)
threshold = 0.5
feet_ratio = 0.30      # remove bottom 30%
margin_ratio = 0.05    # expand crop bbox by 5%
pad_color = (0, 0, 0)  # black padding

# ─────────────────────────────────────────────────────────────────────────────
# 1) YOLOv8 segmentation-based crop: largest cow, remove bottom 30%, pad+resize
# ─────────────────────────────────────────────────────────────────────────────
def yolo_crop_and_chop(image_path, yolo_weights, feet_ratio=0.30, margin_ratio=0.05,
                       target_size=(224, 224), pad_color=(0,0,0)):
    # Load original
    orig = cv2.imread(image_path)
    if orig is None:
        raise FileNotFoundError(f"Could not read image: {image_path}")
    h, w = orig.shape[:2]

    # Run YOLOv8 segmentation
    model = YOLO(yolo_weights)
    res = model(orig, verbose=False)[0]

    if (res.masks is None) or (len(res.boxes) == 0):
        # Fallback: no masks found → just center-crop then chop feet
        crop = orig
    else:
        # Filter to 'cow' if label names available
        largest_idx = None
        largest_area = -1
        for i, box in enumerate(res.boxes):
            cls_id = int(box.cls[0].item()) if hasattr(box.cls[0], "item") else int(box.cls[0])
            name = res.names.get(cls_id, str(cls_id)) if hasattr(res, "names") else str(cls_id)
            if name != "cow":
                continue
            # Prefer mask area; otherwise use bbox area
            if res.masks is not None:
                mask = res.masks.data[i].cpu().numpy().astype(np.uint8)  # (H', W') float->uint8
                # Upsample mask to original size if needed
                if mask.shape[0] != h or mask.shape[1] != w:
                    mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
                area = int(mask.sum())
            else:
                xyxy = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = map(int, xyxy)
                area = max(0, x2 - x1) * max(0, y2 - y1)

            if area > largest_area:
                largest_area = area
                largest_idx = i

        # If no 'cow' label found, fallback to the largest detection
        if largest_idx is None:
            # choose largest by box area
            for i, box in enumerate(res.boxes):
                xyxy = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = map(int, xyxy)
                area = max(0, x2 - x1) * max(0, y2 - y1)
                if area > largest_area:
                    largest_area = area
                    largest_idx = i

        # Get bbox for largest detection (use mask bbox if available)
        box = res.boxes[largest_idx]
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())

        # Optional: tighten bbox to mask bounds if mask exists
        if res.masks is not None:
            mask = res.masks.data[largest_idx].cpu().numpy().astype(np.uint8)
            if mask.shape[0] != h or mask.shape[1] != w:
                mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
            ys, xs = np.where(mask > 0)
            if len(xs) > 0 and len(ys) > 0:
                x1, x2 = xs.min(), xs.max()
                y1, y2 = ys.min(), ys.max()

        # Expand bbox by margin
        bw, bh = x2 - x1, y2 - y1
        mx, my = int(bw * margin_ratio), int(bh * margin_ratio)
        x1 = max(0, x1 - mx)
        y1 = max(0, y1 - my)
        x2 = min(w, x2 + mx)
        y2 = min(h, y2 + my)

        crop = orig[y1:y2, x1:x2]

    # Remove bottom feet_ratio of the crop height
    ch, cw = crop.shape[:2]
    cut = int(ch * (1.0 - feet_ratio))
    cut = max(1, min(cut, ch))  # safety
    crop_no_feet = crop[:cut, :]

    # Pad to square
    ch2, cw2 = crop_no_feet.shape[:2]
    side = max(ch2, cw2)
    pad_top = (side - ch2) // 2
    pad_bottom = side - ch2 - pad_top
    pad_left = (side - cw2) // 2
    pad_right = side - cw2 - pad_left
    padded = cv2.copyMakeBorder(
        crop_no_feet, pad_top, pad_bottom, pad_left, pad_right,
        borderType=cv2.BORDER_CONSTANT, value=pad_color
    )

    # Resize to target
    final = cv2.resize(padded, target_size, interpolation=cv2.INTER_AREA)
    # BGR->RGB for consistency with PIL/TensorFlow pipelines
    final_rgb = cv2.cvtColor(final, cv2.COLOR_BGR2RGB)
    # Normalize [0,1] and add batch dim
    x = (final_rgb.astype(np.float32) / 255.0)[None, ...]  # (1, H, W, 3)
    return x

# ─────────────────────────────────────────────────────────────────────────────
# 2) Prediction helpers (SAME preprocessed batch goes to both models)
# ─────────────────────────────────────────────────────────────────────────────
def predict_keras(model_path, x_batch):
    model = load_model(model_path)
    prob = float(np.squeeze(model.predict(x_batch, verbose=0)))
    return prob

def predict_tflite(model_path, x_batch):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    in_det  = interpreter.get_input_details()[0]
    out_det = interpreter.get_output_details()[0]

    x = x_batch
    if in_det["dtype"] == np.uint8:
        scale, zp = in_det["quantization"]
        if scale == 0: scale, zp = 1.0, 0
        x = np.round(x / scale + zp).astype(np.uint8)
    else:
        x = x.astype(np.float32)

    if list(in_det["shape"]) != list(x.shape):
        interpreter.resize_tensor_input(in_det["index"], x.shape, strict=False)
        interpreter.allocate_tensors()

    interpreter.set_tensor(in_det["index"], x)
    interpreter.invoke()
    y = interpreter.get_tensor(out_det["index"]).astype(np.float32)

    if out_det["dtype"] == np.uint8:
        scale, zp = out_det["quantization"]
        if scale == 0: scale, zp = 1.0, 0
        y = scale * (y - zp)

    prob = float(np.squeeze(y))
    # If your TFLite head outputs logits, apply sigmoid:
    # prob = 1.0 / (1.0 + np.exp(-prob))
    return prob

# ─────────────────────────────────────────────────────────────────────────────
# 3) Run end-to-end: YOLO crop -> chop feet -> pad+resize -> predict both
# ─────────────────────────────────────────────────────────────────────────────
if not os.path.exists(image_path):
    raise FileNotFoundError(image_path)
if not os.path.exists(keras_model_path):
    raise FileNotFoundError(keras_model_path)
if not os.path.exists(tflite_path):
    raise FileNotFoundError(tflite_path)
if not os.path.exists(yolo_weights):
    raise FileNotFoundError(yolo_weights)

x = yolo_crop_and_chop(
    image_path=image_path,
    yolo_weights=yolo_weights,
    feet_ratio=feet_ratio,
    margin_ratio=margin_ratio,
    target_size=target_size,
    pad_color=pad_color
)

# TFLite
tfl_prob = predict_tflite(tflite_path, x)
tfl_pred = "Good" if tfl_prob > threshold else "Bad"

# Keras
kr_prob = predict_keras(keras_model_path, x)
kr_pred = "Good" if kr_prob > threshold else "Bad"

# Print side-by-side
print(f"Image: {image_path}")
print("—— TFLite ———————————————")
print(f"P(Good): {tfl_prob:.4f} | Pred: {tfl_pred} (thr={threshold})")
print("—— Keras ————————————————")
print(f"P(Good): {kr_prob:.4f} | Pred: {kr_pred} (thr={threshold})")
print("—— Compare ——————————————")
print(f"Δ Prob (TFLite−Keras): {tfl_prob - kr_prob:+.4f}")
print(f"Agreement: {tfl_pred == kr_pred}")


Entire test

In [None]:
import os
import csv
import glob
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# ====== Paths (edit these) ======
test_dir = "/Users/suzetteschulenburg/Desktop/MainUse/Test"  # must contain Good/ and Bad/
keras_model_path = "/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras"
tflite_path = "/Users/suzetteschulenburg/Desktop/Wag/cow_model.tflite"
out_csv = "/Users/suzetteschulenburg/Desktop/test_predictions_no_yolo.csv"

# ====== Settings ======
target_size = (224, 224)
threshold = 0.5
classes = {"Good": 1, "Bad": 0}  # ground-truth mapping
valid_exts = (".jpg", ".jpeg", ".png", ".bmp", ".webp")

# ====== Load models once ======
assert os.path.exists(keras_model_path), f"Missing Keras model: {keras_model_path}"
keras_model = load_model(keras_model_path)

assert os.path.exists(tflite_path), f"Missing TFLite model: {tflite_path}"
tflite_interpreter = tf.lite.Interpreter(model_path=tflite_path)
tflite_interpreter.allocate_tensors()
tfl_in = tflite_interpreter.get_input_details()[0]
tfl_out = tflite_interpreter.get_output_details()[0]

# ====== Helpers ======
def load_image_resize_only(path, size=(224, 224)):
    """
    Load image, convert to RGB, simple bilinear resize to `size`, scale to [0,1],
    and add batch dim: (1, H, W, 3). This matches your single-image sanity script.
    """
    img = Image.open(path).convert("RGB").resize(size, Image.BILINEAR)
    arr = np.asarray(img, dtype=np.float32) / 255.0
    return arr[None, ...]  # (1, H, W, 3)

def predict_keras_batch(x_batch):
    prob = float(np.squeeze(keras_model.predict(x_batch, verbose=0)))
    return prob

def predict_tflite_batch(x_batch):
    # Handle quantized or float TFLite inputs
    x = x_batch
    if tfl_in["dtype"] == np.uint8:
        scale, zp = tfl_in["quantization"]
        if scale == 0: scale, zp = 1.0, 0
        x = np.round(x / scale + zp).astype(np.uint8)
    else:
        x = x.astype(np.float32)

    # Resize TFLite input if needed
    if list(tfl_in["shape"]) != list(x.shape):
        tflite_interpreter.resize_tensor_input(tfl_in["index"], x.shape, strict=False)
        tflite_interpreter.allocate_tensors()

    tflite_interpreter.set_tensor(tfl_in["index"], x)
    tflite_interpreter.invoke()
    y = tflite_interpreter.get_tensor(tfl_out["index"]).astype(np.float32)

    # Dequantize output if needed
    if tfl_out["dtype"] == np.uint8:
        scale, zp = tfl_out["quantization"]
        if scale == 0: scale, zp = 1.0, 0
        y = scale * (y - zp)

    prob = float(np.squeeze(y))
    # If your TFLite outputs logits, uncomment:
    # prob = 1.0 / (1.0 + np.exp(-prob))
    return prob

def iter_images(root):
    for label_name, label in classes.items():
        folder = os.path.join(root, label_name)
        if not os.path.isdir(folder):
            continue
        for ext in valid_exts:
            for path in glob.glob(os.path.join(folder, f"*{ext}")):
                yield path, label_name, label

# ====== Run over test set (no YOLO; images are already cropped) ======
rows = []
y_true = []
y_pred_keras = []
y_pred_tfl = []

for img_path, label_name, label in iter_images(test_dir):
    try:
        x = load_image_resize_only(img_path, target_size)
    except Exception as e:
        print(f"⚠️ Skipping unreadable image: {img_path} ({e})")
        continue

    # Predict
    p_keras = predict_keras_batch(x)
    p_tfl = predict_tflite_batch(x)
    pred_keras = 1 if p_keras > threshold else 0
    pred_tfl = 1 if p_tfl > threshold else 0
    agree = (pred_keras == pred_tfl)

    # Collect
    rows.append([img_path, label_name, label, p_keras, pred_keras, p_tfl, pred_tfl, agree])
    y_true.append(label)
    y_pred_keras.append(pred_keras)
    y_pred_tfl.append(pred_tfl)

# ====== Metrics ======
def metrics(y_true, y_pred, tag):
    acc = accuracy_score(y_true, y_pred)
    f1  = f1_score(y_true, y_pred)
    pre = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    print(f"\n📊 {tag} — Test Metrics")
    print(f"Accuracy : {acc:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"Precision: {pre:.4f}")
    print(f"Recall   : {rec:.4f}")
    return acc, f1, pre, rec

if y_true:
    acc_k, f1_k, pre_k, rec_k = metrics(y_true, y_pred_keras, "Keras")
    acc_t, f1_t, pre_t, rec_t = metrics(y_true, y_pred_tfl,  "TFLite")

    agreement_rate = float(np.mean([r[-1] for r in rows]))
    prob_delta = [r[5] - r[3] for r in rows]  # TFLite - Keras
    mean_abs_delta = float(np.mean(np.abs(prob_delta)))

    print("\n🤝 Model Agreement")
    print(f"Agreement rate: {agreement_rate:.4f}")
    print(f"Mean |Δ prob| (TFLite−Keras): {mean_abs_delta:.4f}")
else:
    print("No images found under Good/ or Bad/ in the test_dir.")

# ====== Save CSV ======
os.makedirs(os.path.dirname(out_csv), exist_ok=True)
with open(out_csv, "w", newline="") as f:
    w = csv.writer(f)
    w.writerow(["image_path", "label_name", "label_int",
                "keras_prob", "keras_pred",
                "tflite_prob", "tflite_pred",
                "agree"])
    w.writerows(rows)

print(f"\n✅ Done. Wrote per-image results to:\n{out_csv}")

Tset

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Directories ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

# === Map folds to models (all evaluate on same test set)
folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Load test set once ===
X_test, y_test = load_images_and_labels(test_dir)

# === Evaluate each fold's model on the test set ===
metrics = []
for fold, model_name in folds.items():
    print(f"\n📂 Evaluating Fold {fold} Model on Test Set")
    
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}, skipping.")
        continue

    model = load_model(model_path)

    # Predict
    y_probs = model.predict(X_test, verbose=0).flatten()
    y_preds = (y_probs > 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y_test, y_preds)
    f1 = f1_score(y_test, y_preds)
    prec = precision_score(y_test, y_preds)
    rec = recall_score(y_test, y_preds)
    prec_vals, rec_vals, _ = precision_recall_curve(y_test, y_probs)
    auc_pr = auc(rec_vals, prec_vals)

    metrics.append({
        'Fold': fold,
        'Accuracy': acc,
        'F1 Score': f1,
        'Precision': prec,
        'Recall': rec,
        'AUC-PR': auc_pr
    })

    print(f"  Accuracy : {acc:.4f}")
    print(f"  F1 Score : {f1:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  AUC-PR   : {auc_pr:.4f}")
    print("-" * 40)

# === Print average and std metrics ===
print("\n📊 Average and Std Dev of Test Metrics Across Folds:")
keys = ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']
for key in keys:
    values = [m[key] for m in metrics]
    mean = np.mean(values)
    std = np.std(values)
    print(f"{key}: {mean:.4f} ± {std:.4f}")

Show predictions per cattle ID

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Paths ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir  = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'
fold1_model_name = 'model_fold2345_val1_frozen.keras'

# === Load test images ===
def load_test_images(image_dir):
    images, labels, fnames = [], [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_path):
            if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                fpath = os.path.join(full_path, fname)
                img = load_img(fpath, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
                fnames.append(fname)
    return np.array(images), np.array(labels), fnames

# === Load test set ===
X_test, y_test, file_names = load_test_images(test_dir)

# === Load Fold 1 model ===
model_path = os.path.join(model_dir, fold1_model_name)
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model not found: {model_path}")

model = load_model(model_path)

# === Predict ===
y_probs = model.predict(X_test, verbose=0).flatten()

# === Show predictions for each image ===
print("\n📋 Predictions for Fold 1 model on Test Set:")
print(f"{'Image':<50} {'True':>5} {'Confidence (%)':>15}")
print("-" * 80)
for fname, true_label, prob in zip(file_names, y_test, y_probs):
    confidence = prob * 100  # convert to percentage
    print(f"{fname:<50} {true_label:>5} {confidence:>14.2f}")

In [None]:
import os
import io
import cv2
import numpy as np
from PIL import Image
from ultralytics import YOLO
from tensorflow.keras.models import load_model

# ==== Paths ====
input_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test/Bad' 
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
fold1_model_name = 'model_fold2345_val1_frozen.keras'
yolo_weights = '/Users/suzetteschulenburg/Desktop/Wag/Code/app2/app/cow_api/yolov8s-seg.pt'  # same as server.py location

# Images to process (just these four)
target_images = [
    'CSS19912_IMG_9251.jpg',
    'CSS19912_IMG_9250.jpg',
    'CSS19912_IMG_9249.jpg',
    'CSS19912_IMG_9248.jpg',
]

IMG_SIZE = 224

# === Resize with padding (white background), EXACTLY like server.py ===
def resize_with_padding(image, desired_size=224):
    old_h, old_w = image.shape[:2]
    scale = float(desired_size) / max(old_h, old_w)
    new_h, new_w = int(old_h * scale), int(old_w * scale)
    resized = cv2.resize(image, (new_w, new_h))
    delta_w, delta_h = desired_size - new_w, desired_size - new_h
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)
    return cv2.copyMakeBorder(
        resized, top, bottom, left, right,
        cv2.BORDER_CONSTANT, value=[255, 255, 255]  # white
    )

# === YOLO preprocessing: segment, mask background white, crop with margin, remove bottom 30%, pad/resize ===
def preprocess_image_with_yolo_from_path(image_path, yolo_model):
    # Read with PIL (server.py uses PIL → np.array)
    with open(image_path, 'rb') as f:
        image_bytes = f.read()
    pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    image_rgb = np.array(pil_image)
    H, W = image_rgb.shape[:2]

    results = yolo_model(image_rgb)
    if (not results) or (results[0].masks is None) or (len(results[0].masks.data) == 0):
        raise ValueError("No cow detected in the image.")

    masks = results[0].masks
    boxes = results[0].boxes
    names = results[0].names

    # Choose largest detected 'cow' by area
    best_idx, best_area = None, 0
    for i, cls_id in enumerate(boxes.cls.cpu().numpy()):
        cls_name = names[int(cls_id)]
        if cls_name != "cow":
            continue
        x1, y1, x2, y2 = map(int, boxes.xyxy[i].cpu().numpy())
        area = (x2 - x1) * (y2 - y1)
        if area > best_area:
            best_idx, best_area = i, area

    if best_idx is None:
        raise ValueError("No cow detected in the image.")

    # Build masked image (white background)
    mask = masks.data[best_idx].cpu().numpy()
    mask_resized = cv2.resize(mask, (W, H), interpolation=cv2.INTER_NEAREST)
    mask_3 = np.stack([mask_resized] * 3, axis=-1)
    masked = np.where(mask_3 > 0.5, image_rgb, 255)

    # Crop to bbox with +10% margin, then remove bottom 30%
    x1, y1, x2, y2 = map(int, boxes.xyxy[best_idx].cpu().numpy())
    margin = 0.1
    x1 = max(0, x1 - int((x2 - x1) * margin))
    x2 = min(W, x2 + int((x2 - x1) * margin))
    # remove bottom 30%
    y2 = y1 + int((y2 - y1) * 0.7)
    y1 = max(0, y1); y2 = min(H, y2)

    cropped = masked[y1:y2, x1:x2]
    resized = resize_with_padding(cropped, desired_size=IMG_SIZE).astype(np.float32) / 255.0
    return np.expand_dims(resized, axis=0)  # (1,224,224,3)

# ==== Load models ====
yolo = YOLO(yolo_weights)
keras_model = load_model(os.path.join(model_dir, fold1_model_name))

# ==== Run ====
print("\n📋 Predictions (YOLO seg → server.py preprocessing → Fold 1 Keras):")
print(f"{'Image':<35} {'P(Good)%':>10} {'Pred':>7} {'MaxConf%':>10}")
print("-" * 70)

for name in target_images:
    img_path = os.path.join(input_dir, name)
    try:
        x = preprocess_image_with_yolo_from_path(img_path, yolo)  # (1,224,224,3)
    except Exception as e:
        print(f"{name:<35} {'-':>10} {'Error':>7} {str(e):>10}")
        continue

    p_good = float(keras_model.predict(x, verbose=0).flatten()[0]) * 100.0
    pred = 'Good' if p_good >= 50.0 else 'Bad'
    max_conf = p_good if p_good >= 50.0 else (100.0 - p_good)

    print(f"{name:<35} {p_good:>10.2f} {pred:>7} {max_conf:>10.2f}")

Majority Cows

In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Directories ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

# === Map folds to models
folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test images with filenames and labels
def load_test_images(image_dir):
    images, labels, filenames = [], [], []
    for subdir in ['Good', 'Bad']:
        path = os.path.join(image_dir, subdir)
        if not os.path.exists(path):
            continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(path):
            if fname.lower().endswith('.jpg'):
                fpath = os.path.join(path, fname)
                img = load_img(fpath, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
                filenames.append(fname)
    return np.array(images), np.array(labels), filenames

# === Extract cow ID for voting
def get_individual_id(filename):
    return filename.split('_')[0]  # Example: "ABC123" from "ABC123_1_IMG_4567.jpg"

# === Load test set once
X_test, y_test, test_filenames = load_test_images(test_dir)

# === Evaluate each model
metrics = []
for fold, model_file in folds.items():
    print(f"\n📂 Evaluating Fold {fold} with Majority Voting")

    model_path = os.path.join(model_dir, model_file)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found: {model_path}")
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_test, verbose=0).flatten()
    y_preds = (y_probs > 0.5).astype(int)

    # === Organize by individual
    indiv_preds = defaultdict(list)
    indiv_labels = {}

    for pred, label, fname in zip(y_preds, y_test, test_filenames):
        indiv_id = get_individual_id(fname)
        indiv_preds[indiv_id].append(pred)
        indiv_labels[indiv_id] = label  # Assumes all images of individual have same label

    final_preds = []
    final_labels = []

    for indiv_id in indiv_preds:
        votes = indiv_preds[indiv_id]
        majority_vote = int(np.round(np.mean(votes)))
        final_preds.append(majority_vote)
        final_labels.append(indiv_labels[indiv_id])

    # === Metrics
    acc = accuracy_score(final_labels, final_preds)
    f1 = f1_score(final_labels, final_preds)
    prec = precision_score(final_labels, final_preds)
    rec = recall_score(final_labels, final_preds)
    prec_vals, rec_vals, _ = precision_recall_curve(final_labels, final_preds)
    auc_pr = auc(rec_vals, prec_vals)

    metrics.append({
        'Fold': fold,
        'Accuracy': acc,
        'F1 Score': f1,
        'Precision': prec,
        'Recall': rec,
        'AUC-PR': auc_pr
    })

    print(f"  Accuracy : {acc:.4f}")
    print(f"  F1 Score : {f1:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  AUC-PR   : {auc_pr:.4f}")
    print("-" * 40)

# === Summary
print("\n📊 Average and Std Dev of Metrics Across Folds (Majority Vote):")
for key in ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']:
    values = [m[key] for m in metrics]
    print(f"{key}: {np.mean(values):.4f} ± {np.std(values):.4f}")

Majority Bulls

In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images with filenames and IDs ===
def load_images_labels_and_ids(image_dir):
    images, labels, filenames, ids = [], [], [], []
    for subdir in ['Good', 'Bad']:
        subdir_path = os.path.join(image_dir, subdir)
        if not os.path.exists(subdir_path):
            continue
        for fname in os.listdir(subdir_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(subdir_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                filenames.append(fname)
                # Extract ID from filename (e.g., abc123_1.jpg → abc123)
                id_part = fname.split('_')[0]
                ids.append(id_part)
    return np.array(images), np.array(labels), np.array(filenames), np.array(ids)

X_test, y_test, filenames, ids = load_images_labels_and_ids(test_dir)

# === Evaluate each fold's model with majority voting per individual ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} with majority vote per individual")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)
    y_pred_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # === Group predictions and labels by ID ===
    id_to_preds = defaultdict(list)
    id_to_true = {}

    for pred, true, cow_id in zip(y_pred, y_test, ids):
        id_to_preds[cow_id].append(pred)
        id_to_true[cow_id] = true  # assumes all images of same cow have same label

    # === Apply majority vote per individual ===
    y_true_individuals = []
    y_pred_individuals = []

    for cow_id in sorted(id_to_preds.keys()):
        preds = id_to_preds[cow_id]
        vote = Counter(preds).most_common(1)[0][0]
        y_pred_individuals.append(vote)
        y_true_individuals.append(id_to_true[cow_id])

    # === Metrics ===
    acc = accuracy_score(y_true_individuals, y_pred_individuals)
    f1 = f1_score(y_true_individuals, y_pred_individuals)
    precision = precision_score(y_true_individuals, y_pred_individuals)
    recall = recall_score(y_true_individuals, y_pred_individuals)
    conf_mat = confusion_matrix(y_true_individuals, y_pred_individuals)

    print(f"📊 Fold {fold} Test Metrics (Majority Vote per Individual):")
    print(f"Individuals Evaluated: {len(y_true_individuals)}")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")

In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score, confusion_matrix,
    precision_recall_curve, auc
)

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images with filenames and IDs ===
def load_images_labels_and_ids(image_dir):
    images, labels, filenames, ids = [], [], [], []
    for subdir in ['Good', 'Bad']:
        subdir_path = os.path.join(image_dir, subdir)
        if not os.path.exists(subdir_path):
            continue
        for fname in os.listdir(subdir_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(subdir_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                filenames.append(fname)
                id_part = fname.split('_')[0]  # e.g. BTB12345 from BTB12345_1_IMG.jpg
                ids.append(id_part)
    return np.array(images), np.array(labels), np.array(filenames), np.array(ids)

X_test, y_test, filenames, ids = load_images_labels_and_ids(test_dir)

# === Evaluate each fold's model with majority voting per individual ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} with majority vote per individual")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)
    y_pred_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # === Group predictions and labels by ID ===
    id_to_probs = defaultdict(list)
    id_to_preds = defaultdict(list)
    id_to_true = {}

    for prob, pred, true, cow_id in zip(y_pred_probs, y_pred, y_test, ids):
        id_to_probs[cow_id].append(prob)
        id_to_preds[cow_id].append(pred)
        id_to_true[cow_id] = true  # assumes all images of same cow have same label

    # === Apply majority vote per individual ===
    y_true_individuals = []
    y_pred_individuals = []
    y_prob_individuals = []

    for cow_id in sorted(id_to_preds.keys()):
        preds = id_to_preds[cow_id]
        avg_prob = np.mean(id_to_probs[cow_id])
        vote = Counter(preds).most_common(1)[0][0]

        y_pred_individuals.append(vote)
        y_true_individuals.append(id_to_true[cow_id])
        y_prob_individuals.append(avg_prob)

    # === Metrics ===
    acc = accuracy_score(y_true_individuals, y_pred_individuals)
    f1 = f1_score(y_true_individuals, y_pred_individuals)
    precision = precision_score(y_true_individuals, y_pred_individuals)
    recall = recall_score(y_true_individuals, y_pred_individuals)
    prec_vals, rec_vals, _ = precision_recall_curve(y_true_individuals, y_prob_individuals)
    auc_pr = auc(rec_vals, prec_vals)
    conf_mat = confusion_matrix(y_true_individuals, y_pred_individuals)

    print(f"📊 Fold {fold} Test Metrics (Majority Vote per Individual):")
    print(f"Individuals Evaluated: {len(y_true_individuals)}")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"AUC-PR       : {auc_pr:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})

# === Convert metrics to DataFrame for plotting
plot_data = []

for m in metrics:
    for metric_name in ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']:
        plot_data.append({
            'Fold': f"Fold {m['Fold']}",
            'Metric': metric_name,
            'Value': m[metric_name]
        })

df_plot = pd.DataFrame(plot_data)

# === Determine bottom y-limit and cap top at 1.0
vmin = df_plot['Value'].min()
buffer = 0.05
ylim_low = max(0.0, vmin - buffer)

# === Plot violin plot
plt.figure(figsize=(10, 5))
sns.violinplot(x='Metric', y='Value', data=df_plot, inner='point', palette='muted')
plt.title('Distribution of Evaluation Metrics Across Folds')
plt.ylim(ylim_low, 1.0)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# === Prepare DataFrame
plot_data = []
for m in metrics:
    for metric_name in ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']:
        plot_data.append({
            'Fold': f"Fold {m['Fold']}",
            'Metric': metric_name,
            'Value': m[metric_name]
        })

df_plot = pd.DataFrame(plot_data)

# === Better Bubble Plot
plt.figure(figsize=(10, 6))
sns.scatterplot(
    data=df_plot,
    x='Metric',
    y='Value',
    size='Value',
    hue='Fold',
    sizes=(100, 600),
    alpha=0.7,
    palette='deep'
)

plt.title('Evaluation Metrics per Fold (Bubble Size = Metric Value)')
plt.ylim(0.5, 1.05)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# === Convert to heatmap-style DataFrame
heatmap_data = pd.DataFrame([
    {
        'Fold': f"Fold {m['Fold']}",
        'Accuracy': m['Accuracy'],
        'F1 Score': m['F1 Score'],
        'Precision': m['Precision'],
        'Recall': m['Recall'],
        'AUC-PR': m['AUC-PR']
    }
    for m in metrics
]).set_index('Fold')

# === Plot heatmap
plt.figure(figsize=(10, 4))
sns.heatmap(heatmap_data, annot=True, cmap='Blues', fmt='.3f', cbar=True)
plt.title('Evaluation Metrics Across Folds')
plt.tight_layout()
plt.show()

Confusion

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix

# === Setup
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test images
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Collect confusion matrices
conf_matrices = []

for fold, model_name in folds.items():
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}")
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_probs >= 0.5).astype(int)
    cm = confusion_matrix(y_test, y_pred)
    conf_matrices.append((fold, cm))

# === Plot confusion matrices
fig, axes = plt.subplots(1, 5, figsize=(15, 3))

for i, (fold, cm) in enumerate(conf_matrices):
    ax = axes[i]
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, ax=ax, square=True,
                annot_kws={"size": 10})
    ax.set_title(f'Fold {fold}')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.tick_params(axis='both', which='both', length=0)
    ax.set_xticklabels(['Bad', 'Good'], fontsize=8)
    ax.set_yticklabels(['Bad', 'Good'], fontsize=8, rotation=0)

plt.tight_layout()
plt.suptitle('Confusion Matrices for Each Fold (Threshold = 0.5)', y=1.05)
plt.show()

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix

# === Setup
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test2'

folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test images
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Collect confusion matrices
conf_matrices = []

for fold, model_name in folds.items():
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}")
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_probs >= 0.5).astype(int)
    cm = confusion_matrix(y_test, y_pred)
    conf_matrices.append((fold, cm))

# === Plot confusion matrices
fig, axes = plt.subplots(1, 5, figsize=(15, 3))

for i, (fold, cm) in enumerate(conf_matrices):
    ax = axes[i]
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, ax=ax, square=True,
                annot_kws={"size": 10})
    ax.set_title(f'Fold {fold}')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.tick_params(axis='both', which='both', length=0)
    ax.set_xticklabels(['Bad', 'Good'], fontsize=8)
    ax.set_yticklabels(['Bad', 'Good'], fontsize=8, rotation=0)

plt.tight_layout()
plt.suptitle('Confusion Matrices for Each Fold (Threshold = 0.5)', y=1.05)
plt.show()

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import confusion_matrix

# === Setup
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test images
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Collect and print confusion matrices
conf_matrices = []

print("\n📊 Confusion Matrices (Threshold = 0.5):")
print("Fold\tConfusion Matrix [TN, FP] [FN, TP]")
print("------------------------------------------------")

for fold, model_name in folds.items():
    model_path = os.path.join(model_dir, model_name)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}")
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_probs >= 0.5).astype(int)
    cm = confusion_matrix(y_test, y_pred)
    conf_matrices.append((fold, cm))

    # Print confusion matrix in one line
    cm_flat = f"[{cm[0].tolist()}] [{cm[1].tolist()}]"
    print(f"{fold}\t{cm_flat}")

Scatter plot

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Paths
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

folds = {
    1: 'model_fold2345_val1_frozen.keras',
    2: 'model_fold1345_val2_frozen.keras',
    3: 'model_fold1245_val3_frozen.keras',
    4: 'model_fold1235_val4_frozen.keras',
    5: 'model_fold1234_val5_frozen.keras'
}

# === Load test set
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        folder = os.path.join(image_dir, subdir)
        if not os.path.exists(folder): continue
        for fname in os.listdir(folder):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(folder, fname)
                img = load_img(path, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Collect predictions from all models
all_probs = []
all_labels = []

for fold, model_file in folds.items():
    print(f"📂 Evaluating Fold {fold} Model...")
    model_path = os.path.join(model_dir, model_file)
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found: {model_file}")
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_test, verbose=0).flatten()
    all_probs.extend(y_probs)
    all_labels.extend(y_test)

# === Plotting
plt.figure(figsize=(12, 6))
all_probs = np.array(all_probs)
all_labels = np.array(all_labels * len(folds))  # repeated labels for each model

for i in range(len(all_probs)):
    color = 'green' if all_labels[i] == 1 else 'red'
    plt.scatter(i, all_probs[i], color=color, alpha=0.6)

plt.axhline(y=0.5, linestyle='--', color='black', label='Threshold = 0.5')
plt.title('Scatter Plot of Predicted Probabilities on Test Set (All 5 Models)')
plt.xlabel('Sample Index (across folds)')
plt.ylabel('Predicted Probability')
plt.ylim(0, 1.05)
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

Correct and incorrect images

In [None]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Paths
model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
test_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'

# === Load images with labels and paths
def load_images_and_labels_with_paths(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        folder = os.path.join(image_dir, subdir)
        for fname in os.listdir(folder):
            if fname.lower().endswith('.jpg'):
                fpath = os.path.join(folder, fname)
                img = load_img(fpath, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                images.append(arr)
                labels.append(label)
                paths.append(fpath)
    return np.array(images), np.array(labels), paths

X_test, y_test, img_paths = load_images_and_labels_with_paths(test_dir)

# === Predict
model = load_model(model_path)
y_probs = model.predict(X_test, verbose=0).flatten()
y_pred = (y_probs > 0.5).astype(int)

# === Collect image paths by category
categories = {
    'TP (Correct Good)': [],
    'TN (Correct Bad)': [],
    'FP (Incorrect Good)': [],
    'FN (Incorrect Bad)': []
}

for i in range(len(y_test)):
    true = y_test[i]
    pred = y_pred[i]
    path = img_paths[i]

    if true == 1 and pred == 1:
        categories['TP (Correct Good)'].append(path)
    elif true == 0 and pred == 0:
        categories['TN (Correct Bad)'].append(path)
    elif true == 0 and pred == 1:
        categories['FP (Incorrect Good)'].append(path)
    elif true == 1 and pred == 0:
        categories['FN (Incorrect Bad)'].append(path)

# === Randomly pick one from each
selected_paths = {
    label: random.choice(paths) if paths else None
    for label, paths in categories.items()
}

# === Plot
plt.figure(figsize=(12, 3))
for i, (label, path) in enumerate(selected_paths.items()):
    if path is not None:
        img = load_img(path, target_size=(224, 224))
        plt.subplot(1, 4, i + 1)
        plt.imshow(img)
        plt.title(label, fontsize=10)
        plt.axis('off')
    else:
        print(f"⚠️ No image found for category: {label}")

plt.tight_layout()
plt.show()

# Keep all layers frozen VGG16

### Experiment LR

1e-6

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [1e-6]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)

    # Freeze all layers
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)

    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

5e-6

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [5e-6]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)

    # Freeze all layers
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)

    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

1e-5

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [1e-5]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)

    # Freeze all layers
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)

    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

5e-5

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [5e-5]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)

    # Freeze all layers
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)

    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

1e-4

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [1e-4]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)

    # Freeze all layers
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)

    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

Draw graphs

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Directory ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates and exact filenames ===
learning_rates = ['1e-06', '5e-06', '1e-05', '5e-05', '0.0001']
colors = ['orange', 'purple', 'blue', 'green', 'red']  # match number of LRs

# === Set up figure with 2 subplots ===
fig, axes = plt.subplots(1, 2, figsize=(20, 5))

# === Plot Accuracy ===
for lr, color in zip(learning_rates, colors):
    filename = f'history_fold1_lr{lr}.pkl'
    path = os.path.join(history_dir, filename)
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        axes[0].plot(hist['accuracy'], linestyle='-', color=color, alpha=0.6, label=f'Train Acc (LR={lr})')
        axes[0].plot(hist['val_accuracy'], linestyle='--', color=color, label=f'Val Acc (LR={lr})')
    else:
        print(f"⚠️ Missing: {filename}")
axes[0].set_title('Training and Validation Accuracy per Learning Rate')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].grid(True)
axes[0].legend()

# === Plot Loss ===
for lr, color in zip(learning_rates, colors):
    filename = f'history_fold1_lr{lr}.pkl'
    path = os.path.join(history_dir, filename)
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        axes[1].plot(hist['loss'], linestyle='-', color=color, alpha=0.6, label=f'Train Loss (LR={lr})')
        axes[1].plot(hist['val_loss'], linestyle='--', color=color, label=f'Val Loss (LR={lr})')
axes[1].set_title('Training and Validation Loss per Learning Rate')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].grid(True)
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

# === Directory ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates ===
learning_rates_str = ['1e-06', '5e-06', '1e-05', '5e-05', '0.0001']
learning_rates = [float(lr) for lr in learning_rates_str]

# === Extract best val accuracy and min val loss ===
best_val_accuracies = []
min_val_losses = []

for lr_str in learning_rates_str:
    filename = f'history_fold1_lr{lr_str}.pkl'
    path = os.path.join(history_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        best_val_accuracies.append(max(hist['val_accuracy']))
        min_val_losses.append(min(hist['val_loss']))
    else:
        print(f"⚠️ Missing file: {filename}")
        best_val_accuracies.append(None)
        min_val_losses.append(None)

# === Filter out None entries ===
filtered_lrs = []
filtered_accs = []
filtered_losses = []

for lr, acc, loss in zip(learning_rates, best_val_accuracies, min_val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# Convert to log-scale for interpolation
x = np.log10(filtered_lrs)
acc_y = filtered_accs
loss_y = filtered_losses

# Smooth x values
x_smooth = np.linspace(x.min(), x.max(), 300)

# Interpolated curves
acc_smooth = make_interp_spline(x, acc_y, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, loss_y, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left y-axis)
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')  # smoothed curve (no label)
ax1.scatter(filtered_lrs, filtered_accs, color='blue', label='Val Accuracy')
ax1.tick_params(axis='y', labelcolor='blue')
ax1.set_xscale('log')

# Loss (right y-axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Minimum Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')  # smoothed curve (no label)
ax2.scatter(filtered_lrs, filtered_losses, color='red', label='Val Loss')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Loss vs Learning Rate')
plt.grid(True)
fig.tight_layout()
plt.show()

### Get better lr graphs

Train

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [1e-4, 2e-4, 3e-4, 5e-4, 8e-4, 1e-3]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Build model ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Train over multiple learning rates ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr:.0e}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr:.0e}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr:.0e} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

Graph

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates to include (full range) ===
learning_rates_str = ['1e-06', '5e-06', '1e-05', '5e-05', '1e-04', '2e-04', '3e-04', '5e-04', '8e-04', '1e-03', '2e-3', '3e-3', '5e-3', '1e-2']
learning_rates = [float(lr) for lr in learning_rates_str]

# === Collect best val acc and min val loss from each file ===
val_accuracies = []
val_losses = []

for lr_str in learning_rates_str:
    filename = f'history_fold1_lr{lr_str}.pkl' if float(lr_str) < 1e-4 else f'history_fold1_lr{float(lr_str):.0e}.pkl'
    path = os.path.join(history_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(min(hist['val_loss']))
        print(f"✅ Loaded: {filename} — Val Acc: {val_accuracies[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing file: {filename}")

# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for log-scale smoothing ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')
# Show all tested learning rates as ticks
ax1.set_xticks(filtered_lrs)
ax1.set_xticklabels([f"{lr:.0e}" for lr in filtered_lrs], rotation=45)


# Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Minimum Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Loss vs Learning Rate (VGG16)')
plt.grid(True)
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates to include ===
learning_rates_str = ['1e-06', '5e-06', '1e-05', '5e-05', '1e-04', '2e-04', '3e-04', '5e-04', '8e-04', '1e-03']

# === Initialize dicts to hold all histories ===
all_histories = {}

for lr_str in learning_rates_str:
    lr_display = lr_str
    filename = f'history_fold1_lr{lr_str}.pkl' if float(lr_str) < 1e-4 else f'history_fold1_lr{float(lr_str):.0e}.pkl'
    path = os.path.join(history_dir, filename)

    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
            all_histories[lr_display] = hist
            print(f"✅ Loaded: {filename}")
    else:
        print(f"⚠️ Missing: {filename}")

# === Plot Accuracy ===
plt.figure(figsize=(12, 6))
for lr_str, hist in all_histories.items():
    plt.plot(hist['val_accuracy'], label=f'Val Acc - {lr_str}')
    plt.plot(hist['accuracy'], linestyle='--', label=f'Train Acc - {lr_str}')
plt.title('Training and Validation Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# === Plot Loss ===
plt.figure(figsize=(12, 6))
for lr_str, hist in all_histories.items():
    plt.plot(hist['val_loss'], label=f'Val Loss - {lr_str}')
    plt.plot(hist['loss'], linestyle='--', label=f'Train Loss - {lr_str}')
plt.title('Training and Validation Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Add more lr in diirection of 1e-3

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [2e-3, 3e-3, 5e-3, 1e-2]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Build model ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Train over multiple learning rates ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr:.0e}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr:.0e}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr:.0e} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

### Do more LR

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [
    1.5e-2, # 0.015
    2e-2,   # 0.020
    3e-2,   # 0.030
    5e-2,   # 0.050
    7e-2,   # 0.070
    1e-1    # 0.100 – be cautious, may explode
]


# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Build model ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2–5...")
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Train over multiple learning rates ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_lr{lr:.0e}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_lr{lr:.0e}.pkl')
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr:.0e} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates to include (full range) ===
learning_rates_str = [
    '1e-06', '5e-06', '1e-05', '5e-05', '1e-04',
    '2e-04', '3e-04', '5e-04', '8e-04', '1e-03',
    '2e-03', '3e-03', '5e-03', '1e-02', 
    '1.5e-02', '2e-02', '3e-02', '5e-02', '7e-02', '1e-01'
]
learning_rates = [float(lr) for lr in learning_rates_str]

# === Collect best val acc and average val loss from each file ===
val_accuracies = []
val_losses = []

for lr_str in learning_rates_str:
    try:
        float_lr = float(lr_str)
        filename = f'history_fold1_lr{float_lr:.0e}.pkl'
    except:
        filename = f'history_fold1_lr{lr_str}.pkl'

    path = os.path.join(history_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))  # ← Average instead of min
        print(f"✅ Loaded: {filename} — Val Acc: {val_accuracies[-1]:.4f}, Avg Val Loss: {val_losses[-1]:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing file: {filename}")

# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for log-scale smoothing ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# ✅ Show only a few learning rate ticks
num_ticks_to_show = 6
tick_indices = np.round(np.linspace(0, len(filtered_lrs) - 1, num_ticks_to_show)).astype(int)
selected_lrs = filtered_lrs[tick_indices]
ax1.set_xticks(selected_lrs)
ax1.set_xticklabels([f"{lr:.1e}" for lr in selected_lrs], rotation=60)

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (VGG16)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})


# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates to include (full range) ===
learning_rates_str = [
    '1e-06', '5e-06', '1e-05', '5e-05', '1e-04',
    '2e-04', '3e-04', '5e-04', '8e-04', '1e-03',
    '2e-03', '3e-03', '5e-03', '1e-02', 
    '1.5e-02', '2e-02', '3e-02', '5e-02', '7e-02', '1e-01'
]
learning_rates = [float(lr) for lr in learning_rates_str]

# === Collect best val acc and average val loss from each file ===
val_accuracies = []
val_losses = []

for lr_str in learning_rates_str:
    try:
        float_lr = float(lr_str)
        filename = f'history_fold1_lr{float_lr:.0e}.pkl'
    except:
        filename = f'history_fold1_lr{lr_str}.pkl'

    path = os.path.join(history_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))  # ← Average instead of min
        print(f"✅ Loaded: {filename} — Val Acc: {val_accuracies[-1]:.4f}, Avg Val Loss: {val_losses[-1]:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing file: {filename}")

# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for log-scale smoothing ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# ✅ Show only a few learning rate ticks
num_ticks_to_show = 6
tick_indices = np.round(np.linspace(0, len(filtered_lrs) - 1, num_ticks_to_show)).astype(int)
selected_lrs = filtered_lrs[tick_indices]
ax1.set_xticks(selected_lrs)
ax1.set_xticklabels([f"{lr:.1e}" for lr in selected_lrs], rotation=60)

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (VGG16)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline
from matplotlib.ticker import LogLocator, LogFormatterSciNotation

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})


# === Directory containing all history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates (2e-3 removed) ===
learning_rates_str = [
    '1e-06', '5e-06', '1e-05', '5e-05', '1e-04',
    '2e-04', '3e-04', '5e-04', '8e-04', '1e-03',
    '2e-03',  
    '3e-03', '5e-03', '1e-02', 
    '1.5e-02', '2e-02', '3e-02', '5e-02', '7e-02', '1e-01'
]
learning_rates = [float(lr) for lr in learning_rates_str]

# === Collect best val acc and average val loss from each file ===
val_accuracies = []
val_losses = []

for lr_str in learning_rates_str:
    try:
        float_lr = float(lr_str)
        filename = f'history_fold1_lr{float_lr:.0e}.pkl'
    except:
        filename = f'history_fold1_lr{lr_str}.pkl'

    path = os.path.join(history_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(np.mean(hist['val_loss']))
        print(f"✅ Loaded: {filename} — Val Acc: {val_accuracies[-1]:.4f}, Avg Val Loss: {val_losses[-1]:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing file: {filename}")

# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for log-scale smoothing ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Accuracy (left axis)
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Validation Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.xaxis.set_major_locator(LogLocator(base=10.0, numticks=10))
ax1.xaxis.set_minor_locator(LogLocator(base=10.0, subs='auto', numticks=50))
ax1.xaxis.set_major_formatter(LogFormatterSciNotation())
ax1.tick_params(axis='x', rotation=45)
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Avg Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (VGG16)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Directory containing your saved history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Learning rates to plot from 2e-3 to 1e-1 ===
selected_lrs = [
    2e-3, 3e-3, 5e-3, 1e-2, 1.5e-2, 2e-2, 3e-2, 5e-2, 7e-2, 1e-1
]

# === Store all histories
all_histories = {}

for lr in selected_lrs:
    filename = f'history_fold1_lr{lr:.0e}.pkl'
    path = os.path.join(history_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            history = pickle.load(f)
        all_histories[lr] = history
    else:
        print(f"⚠️ Missing file: {filename}")

# === Plot accuracy (all LRs)
plt.figure(figsize=(12, 5))
for lr, hist in all_histories.items():
    plt.plot(hist['accuracy'], linestyle='--', label=f'Train {lr:.0e}')
    plt.plot(hist['val_accuracy'], label=f'Val {lr:.0e}')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# === Plot loss (all LRs)
plt.figure(figsize=(12, 5))
for lr, hist in all_histories.items():
    plt.plot(hist['loss'], linestyle='--', label=f'Train {lr:.0e}')
    plt.plot(hist['val_loss'], label=f'Val {lr:.0e}')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

### Get metrics per learning

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, precision_recall_curve, auc, accuracy_score
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input  # ✅ Added this

# === Directories ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
model_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
val_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1'

# === Learning rates and plotting colors ===
learning_rates = ['1e-06', '5e-06', '1e-05', '5e-05', '0.0001']
colors = ['orange', 'purple', 'blue', 'green', 'red']

# === Load validation data with proper VGG preprocessing ===
def load_validation_data(val_dir):
    X_val, y_val = [], []
    for label in ['Good', 'Bad']:
        label_dir = os.path.join(val_dir, label)
        if not os.path.exists(label_dir):
            continue
        for fname in os.listdir(label_dir):
            if fname.lower().endswith('.jpg'):
                img = load_img(os.path.join(label_dir, fname), target_size=(224, 224))
                img_array = preprocess_input(img_to_array(img))  # ✅ Use VGG16 preprocessing
                X_val.append(img_array)
                y_val.append(1 if label == 'Good' else 0)
    return np.array(X_val), np.array(y_val)

X_val, y_val = load_validation_data(val_dir)

# === Set up figure ===
fig, axes = plt.subplots(1, 2, figsize=(20, 5))

print("📊 Validation Metrics Summary (Fold 1):\n")

# === Plotting and metrics evaluation ===
for lr, color in zip(learning_rates, colors):
    history_path = os.path.join(history_dir, f'history_fold1_lr{lr}.pkl')
    model_path = os.path.join(model_dir, f'model_fold1_lr{lr}.keras')

    if os.path.exists(history_path) and os.path.exists(model_path):
        # Load history
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)

        # Plot accuracy
        axes[0].plot(hist['accuracy'], linestyle='-', color=color, alpha=0.6, label=f'Train Acc (LR={lr})')
        axes[0].plot(hist['val_accuracy'], linestyle='--', color=color, label=f'Val Acc (LR={lr})')

        # Plot loss
        axes[1].plot(hist['loss'], linestyle='-', color=color, alpha=0.6, label=f'Train Loss (LR={lr})')
        axes[1].plot(hist['val_loss'], linestyle='--', color=color, label=f'Val Loss (LR={lr})')

        # Load model and predict
        model = load_model(model_path)
        y_pred_probs = model.predict(X_val).flatten()
        y_pred_labels = (y_pred_probs > 0.5).astype(int)

        # Compute metrics
        acc = accuracy_score(y_val, y_pred_labels)
        f1 = f1_score(y_val, y_pred_labels, zero_division=1)
        precision = precision_score(y_val, y_pred_labels, zero_division=1)
        recall = recall_score(y_val, y_pred_labels, zero_division=1)
        prec_curve, rec_curve, _ = precision_recall_curve(y_val, y_pred_probs)
        auc_pr = auc(rec_curve, prec_curve)

        # Display
        print(f"🟢 LR {lr}")
        print(f"   Accuracy  : {acc:.4f}")
        print(f"   F1 Score  : {f1:.4f}")
        print(f"   Precision : {precision:.4f}")
        print(f"   Recall    : {recall:.4f}")
        print(f"   AUC-PR    : {auc_pr:.4f}\n")
    else:
        print(f"⚠️ Missing files for LR {lr}")

# === Final plot settings ===
axes[0].set_title('Training and Validation Accuracy per Learning Rate')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].grid(True)
axes[0].legend()

axes[1].set_title('Training and Validation Loss per Learning Rate')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].grid(True)
axes[1].legend()

plt.tight_layout()
plt.show()

Get val losses

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === Directories ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
model_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
val_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1'

# === Learning rates and plotting colors ===
learning_rates = ['1e-06', '5e-06', '1e-05', '5e-05', '0.0001']
colors = ['orange', 'purple', 'blue', 'green', 'red']

# === Load validation data (optional for full consistency, even if unused) ===
def load_validation_data(val_dir):
    X_val, y_val = [], []
    for label in ['Good', 'Bad']:
        label_dir = os.path.join(val_dir, label)
        if not os.path.exists(label_dir):
            continue
        for fname in os.listdir(label_dir):
            if fname.lower().endswith('.jpg'):
                img = load_img(os.path.join(label_dir, fname), target_size=(224, 224))
                img_array = preprocess_input(img_to_array(img))
                X_val.append(img_array)
                y_val.append(1 if label == 'Good' else 0)
    return np.array(X_val), np.array(y_val)

X_val, y_val = load_validation_data(val_dir)

# === Set up figure ===
fig, axes = plt.subplots(1, 2, figsize=(20, 5))

avg_val_losses = []

print("📊 Average Validation Loss per Learning Rate (Fold 1):\n")

# === Plotting and average val loss summary ===
for lr, color in zip(learning_rates, colors):
    history_path = os.path.join(history_dir, f'history_fold1_lr{lr}.pkl')
    model_path = os.path.join(model_dir, f'model_fold1_lr{lr}.keras')

    if os.path.exists(history_path) and os.path.exists(model_path):
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)

        # Plot accuracy
        axes[0].plot(hist['accuracy'], linestyle='-', color=color, alpha=0.6, label=f'Train Acc (LR={lr})')
        axes[0].plot(hist['val_accuracy'], linestyle='--', color=color, label=f'Val Acc (LR={lr})')

        # Plot loss
        axes[1].plot(hist['loss'], linestyle='-', color=color, alpha=0.6, label=f'Train Loss (LR={lr})')
        axes[1].plot(hist['val_loss'], linestyle='--', color=color, label=f'Val Loss (LR={lr})')

        # Compute average val loss
        avg_val = np.mean(hist['val_loss'])
        avg_val_losses.append(avg_val)
        print(f"🟢 LR {lr} → Avg Val Loss: {avg_val:.4f}")
    else:
        print(f"⚠️ Missing files for LR {lr}")

# === Final plot settings ===
axes[0].set_title('Training and Validation Accuracy per Learning Rate')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].grid(True)
axes[0].legend()

axes[1].set_title('Training and Validation Loss per Learning Rate')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].grid(True)
axes[1].legend()

plt.tight_layout()
plt.show()

# === Print overall average of all avg val losses
if avg_val_losses:
    overall_avg = np.mean(avg_val_losses)
    print(f"\n📉 Overall Average of Avg Val Losses: {overall_avg:.4f}")

## Take best lr and train 5 folds 

Fold 1

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold1')  # ✅ Use Fold1 as validation set
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [2e-3]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 2, 3, 4, and 5...")
X_train, y_train = [], []
for i in [2, 3, 4, 5]:  # ✅ Exclude Fold1 from training
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 1...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold1_val_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold1_val_lr{lr}.pkl')  # ✅ Save history with Fold1 label
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

Fold 2

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold2')  # Changed from Fold1 to Fold2
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [2e-3]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 1, 3, 4, and 5...")
X_train, y_train = [], []
for i in [1, 3, 4, 5]:  # Changed to exclude Fold2 from training
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 2...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold2_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold2_lr{lr}.pkl')  # Updated filename
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

Fold 3

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold3')  # Validation set is Fold 3
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [2e-3]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 1, 2, 4, and 5...")
X_train, y_train = [], []
for i in [1, 2, 4, 5]:  # Exclude Fold 3 from training
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 3...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold3_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold3_lr{lr}.pkl')  # Updated filename
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

Fold 4

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold4')  # Validation set is Fold 4
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [2e-3]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 1, 2, 3, and 5...")
X_train, y_train = [], []
for i in [1, 2, 3, 5]:  # Exclude Fold 4 from training
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 4...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold4_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold4_lr{lr}.pkl')  # Updated filename
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

Fold 5

In [None]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.regularizers import l2
import pickle
import tensorflow as tf

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold_dir = os.path.join(base_fold_dir, 'Fold5')  # Validation set is Fold 5
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

learning_rates = [2e-3]

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_vgg16_model(image_shape, learning_rate):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load datasets ===
print("🔄 Loading training data from Folds 1, 2, 3, and 4...")
X_train, y_train = [], []
for i in [1, 2, 3, 4]:  # Exclude Fold 5 from training
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

print("📥 Loading validation data from Fold 5...")
X_val, y_val = load_images_and_labels(val_fold_dir)

# === Training ===
for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_vgg16_model(X_train.shape[1:], learning_rate=lr)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7, verbose=1),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_fold5_lr{lr}.keras'), save_best_only=True, verbose=1)
    ]

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=150,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )

    history_path = os.path.join(history_save_dir, f'history_fold5_lr{lr}.pkl')  # Updated filename
    with open(history_path, 'wb') as f:
        pickle.dump(history.history, f)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    print(f"✅ LR {lr} — Val Acc: {val_acc*100:.2f}%, Val Loss: {val_loss:.4f}")

    del model
    tf.keras.backend.clear_session()

### Analyze

Analyze graph

In [None]:
import os
import pickle
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === Directory settings ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'
model_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Models_VGGFrozen'
val_base_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'

folds = [1, 2, 3, 4, 5]
metrics_dict = {
    'Accuracy': [],
    'F1 Score': [],
    'Precision': [],
    'Recall': [],
    'AUC-PR': []
}

# === Function to load validation images ===
def load_val_data(fold_num):
    fold_dir = os.path.join(val_base_dir, f'Fold{fold_num}')
    X, y = [], []
    for label_name in ['Good', 'Bad']:
        label_dir = os.path.join(fold_dir, label_name)
        label = 1 if label_name == 'Good' else 0
        for fname in os.listdir(label_dir):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(label_dir, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_arr = img_to_array(img)
                img_arr = preprocess_input(img_arr)
                X.append(img_arr)
                y.append(label)
    return np.array(X), np.array(y)

# === Evaluate each fold ===
for fold in folds:
    print(f"📁 Evaluating Fold {fold}")
    
    # Adjust model filename for Fold 1
    if fold == 1:
        model_filename = 'model_fold1_val_lr0.002.keras'
    else:
        model_filename = f'model_fold{fold}_lr0.002.keras'
    
    model_path = os.path.join(model_dir, model_filename)
    if not os.path.exists(model_path):
        print(f"❌ Model not found: {model_path}")
        continue
    model = load_model(model_path)

    # Load validation data
    X_val, y_val = load_val_data(fold)

    # Predict
    probs = model.predict(X_val).flatten()
    preds = (probs > 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y_val, preds)
    f1 = f1_score(y_val, preds)
    prec = precision_score(y_val, preds)
    rec = recall_score(y_val, preds)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, probs)
    auprc = auc(recall_vals, precision_vals)

    # Store
    metrics_dict['Accuracy'].append(acc)
    metrics_dict['F1 Score'].append(f1)
    metrics_dict['Precision'].append(prec)
    metrics_dict['Recall'].append(rec)
    metrics_dict['AUC-PR'].append(auprc)

    # Print
    print(f"Accuracy: {acc:.4f}, F1: {f1:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, AUC-PR: {auprc:.4f}")
    print("-" * 60)

# === Print Average and Std ===
print("\n📊 Average Metrics Across Folds:")
for metric, values in metrics_dict.items():
    mean_val = np.mean(values)
    std_val = np.std(values)
    print(f"{metric}: {mean_val:.4f} ± {std_val:.4f}")

Graph

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})


# === Directory where history files are saved ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Fold numbers and learning rate used ===
folds = [1, 2, 3, 4, 5]
learning_rate = '0.002'  # format string, no scientific notation

def load_history(fold):
    if fold == 1:
        filename = f'history_fold1_val_lr{learning_rate}.pkl'
    else:
        filename = f'history_fold{fold}_lr{learning_rate}.pkl'
    file_path = os.path.join(history_save_dir, filename)
    
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    else:
        print(f"❌ History file not found for Fold {fold}: {file_path}")
        return None

def plot_train_val_graphs(fold_histories):
    plt.figure(figsize=(14, 6))

    # Plot Loss
    plt.subplot(1, 2, 1)
    for fold, history in fold_histories.items():
        plt.plot(history['loss'], color='blue', alpha=0.6, label=f'Fold {fold} Train Loss' if fold == 1 else "")
        plt.plot(history['val_loss'], linestyle='--', color='red', alpha=0.6, label=f'Fold {fold} Val Loss' if fold == 1 else "")
    plt.title('Training and Validation Loss Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(['Train Loss', 'Val Loss'])
    plt.grid(alpha=0.3)

    # Plot Accuracy
    plt.subplot(1, 2, 2)
    for fold, history in fold_histories.items():
        plt.plot(history['accuracy'], color='blue', alpha=0.6, label=f'Fold {fold} Train Acc' if fold == 1 else "")
        plt.plot(history['val_accuracy'], linestyle='--', color='red', alpha=0.6, label=f'Fold {fold} Val Acc' if fold == 1 else "")
    plt.title('Training and Validation Accuracy Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(['Train Accuracy', 'Val Accuracy'])
    plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

# === Load all histories ===
fold_histories = {}
for fold in folds:
    history = load_history(fold)
    if history:
        fold_histories[fold] = history

if fold_histories:
    plot_train_val_graphs(fold_histories)
else:
    print("🚫 No valid history files found.")

Get metrics

In [None]:
import os
import pickle
import numpy as np

# === Directory where history files are saved ===
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Saved_Histories_VGGFrozen'

# === Folds and learning rate ===
folds = [1, 2, 3, 4, 5]
learning_rate = '0.002'

def load_history(fold):
    if fold == 1:
        filename = f'history_fold1_val_lr{learning_rate}.pkl'
    else:
        filename = f'history_fold{fold}_lr{learning_rate}.pkl'
    path = os.path.join(history_save_dir, filename)
    
    if os.path.exists(path):
        with open(path, 'rb') as f:
            return pickle.load(f)
    else:
        print(f"❌ History not found for Fold {fold}")
        return None

# === Store metrics ===
val_accuracies = []
val_losses = []
train_accuracies = []
train_losses = []

print("📊 Metrics per Fold:\n")
for fold in folds:
    history = load_history(fold)
    if history:
        val_acc = history['val_accuracy'][-1]
        val_loss = history['val_loss'][-1]
        train_acc = history['accuracy'][-1]
        train_loss = history['loss'][-1]

        val_accuracies.append(val_acc)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        train_losses.append(train_loss)

        print(f"📁 Fold {fold}:")
        print(f"   ✅ Val Accuracy:  {val_acc * 100:.2f}%")
        print(f"   ✅ Val Loss:      {val_loss:.4f}")
        print(f"   🏋️ Train Accuracy:{train_acc * 100:.2f}%")
        print(f"   🏋️ Train Loss:    {train_loss:.4f}")
        print()

# === Mean and Std Dev ===
print("📈 Average Metrics Across Folds:")
print(f"🔹 Mean Val Accuracy : {np.mean(val_accuracies) * 100:.2f}% ± {np.std(val_accuracies) * 100:.2f}%")
print(f"🔹 Mean Val Loss     : {np.mean(val_losses):.4f} ± {np.std(val_losses):.4f}")
print(f"🔹 Mean Train Accuracy : {np.mean(train_accuracies) * 100:.2f}% ± {np.std(train_accuracies) * 100:.2f}%")
print(f"🔹 Mean Train Loss     : {np.mean(train_losses):.4f} ± {np.std(train_losses):.4f}")

In [None]:
import os
import numpy as np
import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input

# === Paths ===
base_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_dir = os.path.join(base_dir, 'Saved_Models_VGGFrozen')
folds = [1, 2, 3, 4, 5]
lr = '0.002'

# === Load images and labels ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        label = 1 if subdir == 'Good' else 0
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_path, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Store metrics ===
all_metrics = []

print("📊 Evaluation Metrics per Fold:\n")

for fold in folds:
    # Load validation data
    val_dir = os.path.join(base_dir, f'Fold{fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    # Load model
    if fold == 1:
        model_path = os.path.join(model_dir, f'model_fold1_val_lr{lr}.keras')
    else:
        model_path = os.path.join(model_dir, f'model_fold{fold}_lr{lr}.keras')
    
    if not os.path.exists(model_path):
        print(f"❌ Model for Fold {fold} not found.")
        continue

    model = load_model(model_path)
    y_probs = model.predict(X_val, verbose=0).flatten()
    y_pred = (y_probs >= 0.5).astype(int)

    # Metrics
    acc = accuracy_score(y_val, y_pred)
    prec = precision_score(y_val, y_pred)
    rec = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    prc_prec, prc_rec, _ = precision_recall_curve(y_val, y_probs)
    auc_pr = auc(prc_rec, prc_prec)

    all_metrics.append([acc, prec, rec, f1, auc_pr])

    print(f"📁 Fold {fold}")
    print(f"   Accuracy : {acc*100:.2f}%")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall   : {rec:.4f}")
    print(f"   F1 Score : {f1:.4f}")
    print(f"   AUC-PR   : {auc_pr:.4f}")
    print()

# === Mean ± Std ===
all_metrics = np.array(all_metrics)
mean = np.mean(all_metrics, axis=0)
std = np.std(all_metrics, axis=0)

print("📈 Average Metrics Across Folds:")
print(f"🔹 Accuracy : {mean[0]*100:.2f}% ± {std[0]*100:.2f}%")
print(f"🔹 Precision: {mean[1]:.4f} ± {std[1]:.4f}")
print(f"🔹 Recall   : {mean[2]:.4f} ± {std[2]:.4f}")
print(f"🔹 F1 Score : {mean[3]:.4f} ± {std[3]:.4f}")
print(f"🔹 AUC-PR   : {mean[4]:.4f} ± {std[4]:.4f}")

# ResNet50

### Diffrent image preprocessing

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPre'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPre'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [1e-3, 5e-4]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=100,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Analyze

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === History Directories ===
history_dir_base = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCropped'
history_dir_more_layers = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedMoreLayers'
history_dir_pre = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPre'

# === Configs: learning rate, color, label, linestyle, source dir
configs = [
    # 1e-3
    {'lr': 1e-3, 'color': 'royalblue', 'label': 'Base LR=1e-3', 'style': '-', 'dir': history_dir_base},
    {'lr': 1e-3, 'color': 'navy', 'label': 'More Layers LR=1e-3', 'style': '--', 'dir': history_dir_more_layers},
    {'lr': 1e-3, 'color': 'teal', 'label': 'Preprocessed LR=1e-3', 'style': ':', 'dir': history_dir_pre},
    
    # 5e-4
    {'lr': 5e-4, 'color': 'darkorange', 'label': 'Base LR=5e-4', 'style': '-', 'dir': history_dir_base},
    {'lr': 5e-4, 'color': 'firebrick', 'label': 'More Layers LR=5e-4', 'style': '--', 'dir': history_dir_more_layers},
    {'lr': 5e-4, 'color': 'seagreen', 'label': 'Preprocessed LR=5e-4', 'style': ':', 'dir': history_dir_pre},
]

plt.figure(figsize=(18, 8))

# === Accuracy Plot ===
plt.subplot(1, 2, 1)
for cfg in configs:
    path = os.path.join(cfg['dir'], f'history_resnet_lr{cfg["lr"]}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        plt.plot(hist['val_accuracy'], linestyle=cfg['style'], color=cfg['color'], linewidth=2, label=f'{cfg["label"]} (Val)')
        plt.plot(hist['accuracy'], linestyle=cfg['style'], color=cfg['color'], alpha=0.3, label=f'{cfg["label"]} (Train)')
plt.title('Accuracy per Epoch', fontsize=16)
plt.xlabel('Epoch', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.legend(fontsize=10, loc='lower right')
plt.grid(True)

# === Loss Plot ===
plt.subplot(1, 2, 2)
for cfg in configs:
    path = os.path.join(cfg['dir'], f'history_resnet_lr{cfg["lr"]}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        plt.plot(hist['val_loss'], linestyle=cfg['style'], color=cfg['color'], linewidth=2, label=f'{cfg["label"]} (Val)')
        plt.plot(hist['loss'], linestyle=cfg['style'], color=cfg['color'], alpha=0.3, label=f'{cfg["label"]} (Train)')
plt.title('Loss per Epoch', fontsize=16)
plt.xlabel('Epoch', fontsize=14)
plt.ylabel('Loss', fontsize=14)
plt.legend(fontsize=10, loc='upper right')
plt.grid(True)

plt.tight_layout()
plt.show()

### Experiment lr with correct preprocessing steps

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [5e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

1e-6

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [1e-6]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

5e-4

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [5e-4]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

1e-3

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [1e-3]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Do 0.01 and 5e-2

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [5e-2, 1e-2]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

### Analyze

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Directory ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Learning rates and their exact filename representations ===
learning_rates = ['0.001', '0.0005', '0.0001', '1e-05', '5e-05', '5e-06', '1e-06']
colors = ['brown', 'black', 'red', 'blue', 'green', 'purple', 'orange']


fig, axes = plt.subplots(1, 2, figsize=(20, 5))

# === Plot Accuracy ===
for lr, color in zip(learning_rates, colors):
    path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        # Plot only first 100 epochs
        axes[0].plot(hist['accuracy'][:100], linestyle='-', color=color, alpha=0.6, label=f'Train Acc (LR={lr})')
        axes[0].plot(hist['val_accuracy'][:100], linestyle='--', color=color, label=f'Val Acc (LR={lr})')
axes[0].set_title('Training and Validation Accuracy (First 100 Epochs)')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].grid(True)
axes[0].legend()

# === Plot Loss ===
for lr, color in zip(learning_rates, colors):
    path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        axes[1].plot(hist['loss'][:100], linestyle='-', color=color, alpha=0.6, label=f'Train Loss (LR={lr})')
        axes[1].plot(hist['val_loss'][:100], linestyle='--', color=color, label=f'Val Loss (LR={lr})')
axes[1].set_title('Training and Validation Loss (First 100 Epochs)')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].grid(True)
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import make_interp_spline

# === Directory with ResNet histories ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Learning rates and filenames ===

learning_rates_str =  ['0.001', '0.0005', '0.0001', '1e-05', '5e-05', '5e-06', '1e-06']
learning_rates = [float(lr) for lr in learning_rates_str]

# === Extract best val acc and min val loss ===
val_accuracies = []
val_losses = []

for lr_str in learning_rates_str:
    path = os.path.join(history_dir, f'history_resnet_lr{lr_str}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        val_accuracies.append(max(hist['val_accuracy']))
        val_losses.append(min(hist['val_loss']))
        print(f"✅ LR={lr_str}: Val Acc={max(hist['val_accuracy']):.4f}, Val Loss={min(hist['val_loss']):.4f}")
    else:
        print(f"⚠️ Missing file: {lr_str}")
        val_accuracies.append(None)
        val_losses.append(None)

# === Filter out missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort by learning rate to ensure strictly increasing x ===
sorted_indices = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_indices]
filtered_accs = np.array(filtered_accs)[sorted_indices]
filtered_losses = np.array(filtered_losses)[sorted_indices]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left y-axis)
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# Loss (right y-axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Loss vs Learning Rate (ResNet)')
plt.grid(True)
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Directory ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Learning rates and their exact filename representations ===
learning_rates = ['0.0001', '0.0005', '0.001', '0.005', '0.01', '0.05']
colors = ['red', 'blue', 'green', 'purple', 'orange', 'brown', 'teal', 'pink', 'cyan']

# === Plot Accuracy ===
plt.figure(figsize=(12, 5))
for lr, color in zip(learning_rates, colors):
    path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        plt.plot(hist['accuracy'], linestyle='-', color=color, alpha=0.6, label=f'Train Acc (LR={lr})')
        plt.plot(hist['val_accuracy'], linestyle='--', color=color, label=f'Val Acc (LR={lr})')
plt.title('Training and Validation Accuracy per Learning Rate')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# === Plot Loss ===
plt.figure(figsize=(12, 5))
for lr, color in zip(learning_rates, colors):
    path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(path):
        with open(path, 'rb') as f:
            hist = pickle.load(f)
        plt.plot(hist['loss'], linestyle='-', color=color, alpha=0.6, label=f'Train Loss (LR={lr})')
        plt.plot(hist['val_loss'], linestyle='--', color=color, label=f'Val Loss (LR={lr})')
plt.title('Training and Validation Loss per Learning Rate')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

Metrics

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc

# === Settings ===
model_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
val_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Fold1'
learning_rates = ['0.001', '0.0005', '0.0001', '1e-05', '5e-05', '5e-06', '1e-06']

# === Helper function to load images ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path): continue
        label = 1 if subdir == 'Good' else 0
        for fname in os.listdir(full_path):
            if fname.endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = preprocess_input(img_to_array(img))
                images.append(img_array)
                labels.append(label)
    return np.array(images), np.array(labels)

# === Load validation data ===
X_val, y_val = load_images_and_labels(val_dir)

# === Evaluation loop (validation only) ===
for lr in learning_rates:
    model_path = os.path.join(model_dir, f'model_resnet_lr{lr}_fold1.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model for LR={lr} not found, skipping.")
        continue

    model = load_model(model_path)

    # === Evaluate on validation set ===
    probs = model.predict(X_val, verbose=0).flatten()
    preds = (probs > 0.5).astype(int)
    acc = accuracy_score(y_val, preds)
    f1 = f1_score(y_val, preds)
    prec = precision_score(y_val, preds)
    rec = recall_score(y_val, preds)
    prec_vals, rec_vals, _ = precision_recall_curve(y_val, probs)
    auc_pr = auc(rec_vals, prec_vals)

    print(f"📊 Validation Metrics for LR={lr}")
    print(f"  Accuracy : {acc:.4f}")
    print(f"  F1 Score : {f1:.4f}")
    print(f"  Precision: {prec:.4f}")
    print(f"  Recall   : {rec:.4f}")
    print(f"  AUC-PR   : {auc_pr:.4f}")
    print("-" * 40)

In [None]:
loss_metrics = []

for lr in learning_rates:
    file_path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            hist = pickle.load(f)

        min_val_loss = min(hist['val_loss'])
        avg_val_loss = sum(hist['val_loss']) / len(hist['val_loss'])
        final_train_loss = hist['loss'][-1]
        loss_metrics.append((lr, round(min_val_loss, 4), round(avg_val_loss, 4), round(final_train_loss, 4)))
    else:
        loss_metrics.append((lr, 'missing', 'missing', 'missing'))

# === Display Updated Results ===
print(f"{'LR':<10} {'Min Val Loss':<15} {'Avg Val Loss':<15} {'Final Train Loss'}")
print("-" * 55)
for lr, min_loss, avg_loss, train_loss in loss_metrics:
    print(f"{lr:<10} {min_loss:<15} {avg_loss:<15} {train_loss}")

### Get better graph per lr

Train

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [1e-5, 2e-5, 3e-5, 5e-5, 7e-5, 1e-4, 2e-4, 3e-4, 5e-4, 7e-4, 1e-3]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Draw graph

In [None]:
from scipy.interpolate import make_interp_spline

# === Directory with saved history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Same learning rates used in training ===
learning_rates = [1e-5, 2e-5, 3e-5, 5e-5, 7e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-4, 7e-4, 1e-3]

# === Collect best val accuracy and min val loss per LR ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    file_path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            history = pickle.load(f)
        best_val_acc = max(history['val_accuracy'])
        min_val_loss = min(history['val_loss'])
        val_accuracies.append(best_val_acc)
        val_losses.append(min_val_loss)
        print(f"✅ LR={lr:.0e}: Val Acc={best_val_acc:.4f}, Val Loss={min_val_loss:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing history: {file_path}")

# === Filter out missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for safe interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot smoothed curves ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left y-axis)
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# Add all learning rate ticks
ax1.set_xticks(filtered_lrs)
ax1.set_xticklabels([f"{lr:.0e}" for lr in filtered_lrs], rotation=45)

# Loss (right y-axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Minimum Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Loss vs Learning Rate (ResNet50)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

Add 1.5e-4

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [1.5e-4]

# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

### Get more LR

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [
    2e-3,   # 0.002
    3e-3,   # 0.003
    5e-3,   # 0.005
    7e-3,   # 0.007
    1e-2,   # 0.010
    1.5e-2, # 0.015
    2e-2,   # 0.020
    3e-2,   # 0.030
    5e-2,   # 0.050
    1e-1    # 0.100
]


# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

More LR

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcess'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Learning rates to test ===
learning_rates = [
    1e-1 ,  
    6e-2, 8e-2, 9e-2
]


# === Load function with preprocess_input ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))  # <== changed here
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

# === Run learning rate experiments ===
results = {}

for lr in learning_rates:
    print(f"\n🚀 Training with Learning Rate = {lr}")
    model = create_resnet50_model(X_train.shape[1:], lr)
    
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
    ]
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    
    # Save history
    with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate
    val_preds = model.predict(X_val) > 0.5
    val_f1 = f1_score(y_val, val_preds)
    val_acc = np.mean(val_preds.flatten() == y_val)

    results[lr] = {'accuracy': val_acc * 100, 'f1_score': val_f1}
    print(f"✅ LR {lr} — Accuracy: {val_acc:.2%}, F1: {val_f1:.4f}")

    del model
    tf.keras.backend.clear_session()

# === Plot Results ===
plt.figure(figsize=(10, 5))
plt.plot(results.keys(), [r['accuracy'] for r in results.values()], marker='o', label='Accuracy (%)')
plt.plot(results.keys(), [r['f1_score'] for r in results.values()], marker='s', label='F1 Score')
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Performance')
plt.title('ResNet50 Learning Rate vs Performance (Fold 1)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# === Directory with saved history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Same learning rates used in training ===
learning_rates = [
    1e-5,
    2e-5,
    3e-5,
    5e-5,
    7e-5,
    1e-4,
    1.5e-4,
    2e-4,
    3e-4,
    5e-4,
    7e-4,
    1e-3,
    2e-3,    # 0.002
    3e-3,    # 0.003
    5e-3,    # 0.005
    7e-3,    # 0.007
    1e-2,    # 0.010
    1.5e-2,  # 0.015
    2e-2,    # 0.020
    3e-2,    # 0.030
    5e-2,    # 0.050
    6e-2,    # 0.060
    7e-2,    # 0.070 (already tested, you can comment if reused)
    8e-2,    # 0.080
    9e-2,    # 0.090
    1e-1     # 0.100
]


# === Collect best val accuracy and min val loss per LR ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    file_path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            history = pickle.load(f)
        best_val_acc = max(history['val_accuracy'])
        min_val_loss = min(history['val_loss'])
        val_accuracies.append(best_val_acc)
        val_losses.append(min_val_loss)
        print(f"✅ LR={lr:.0e}: Val Acc={best_val_acc:.4f}, Val Loss={min_val_loss:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing history: {file_path}")

# === Filter out missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for safe interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot smoothed curves ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Accuracy (left y-axis)
ax1.set_xlabel('Learning Rate (log scale)')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# Add all learning rate ticks
ax1.set_xticks(filtered_lrs)
ax1.set_xticklabels([f"{lr:.0e}" for lr in filtered_lrs], rotation=45)

# Loss (right y-axis)
ax2 = ax1.twinx()
ax2.set_ylabel('Minimum Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Loss vs Learning Rate (ResNet50)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# === Directory with saved history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Learning rates used in training ===
learning_rates = [
    1e-5, 2e-5, 3e-5, 5e-5, 7e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-4, 7e-4,
    1e-3, 2e-3, 3e-3, 5e-3, 7e-3, 1e-2, 2e-2, 3e-2, 5e-2, 6e-2, 7e-2, 8e-2, 9e-2
]

# === Collect best val accuracy and average val loss per LR ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    file_path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            history = pickle.load(f)
        best_val_acc = max(history['val_accuracy'])
        avg_val_loss = np.mean(history['val_loss'])  # Use average
        val_accuracies.append(best_val_acc)
        val_losses.append(avg_val_loss)
        print(f"✅ LR={lr:.0e}: Val Acc={best_val_acc:.4f}, Avg Val Loss={avg_val_loss:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing history: {file_path}")

# === Filter out missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Interpolate smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Left y-axis: Accuracy
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Val Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# ✅ Show only a few learning rates on x-axis
num_ticks_to_show = 6
tick_indices = np.round(np.linspace(0, len(filtered_lrs) - 1, num_ticks_to_show)).astype(int)
selected_lrs = filtered_lrs[tick_indices]
ax1.set_xticks(selected_lrs)
ax1.set_xticklabels([f"{lr:.1e}" for lr in selected_lrs], rotation=60)

# Right y-axis: Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Avg Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (ResNet50)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})


# === Directory with saved history files ===
history_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcess'

# === Learning rates used in training ===
learning_rates = [
    1e-5, 2e-5, 3e-5, 5e-5, 7e-5, 1e-4, 1.5e-4, 2e-4, 3e-4, 5e-4, 7e-4,
    1e-3, 2e-3, 3e-3, 5e-3, 7e-3, 1e-2, 2e-2, 3e-2, 5e-2, 6e-2, 7e-2, 8e-2, 9e-2
]

# === Collect best val accuracy and average val loss per LR ===
val_accuracies = []
val_losses = []

for lr in learning_rates:
    file_path = os.path.join(history_dir, f'history_resnet_lr{lr}_fold1.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            history = pickle.load(f)
        best_val_acc = max(history['val_accuracy'])
        avg_val_loss = np.mean(history['val_loss'])  # Use average
        val_accuracies.append(best_val_acc)
        val_losses.append(avg_val_loss)
        print(f"✅ LR={lr:.0e}: Val Acc={best_val_acc:.4f}, Avg Val Loss={avg_val_loss:.4f}")
    else:
        val_accuracies.append(None)
        val_losses.append(None)
        print(f"⚠️ Missing history: {file_path}")

# === Filter out missing values ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

# === Sort for interpolation ===
sorted_idx = np.argsort(filtered_lrs)
filtered_lrs = np.array(filtered_lrs)[sorted_idx]
filtered_accs = np.array(filtered_accs)[sorted_idx]
filtered_losses = np.array(filtered_losses)[sorted_idx]

# === Interpolate smooth curves ===
x = np.log10(filtered_lrs)
x_smooth = np.linspace(x.min(), x.max(), 300)
acc_smooth = make_interp_spline(x, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(x, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(12, 6))

# Left y-axis: Accuracy
ax1.set_xlabel('Learning Rate')
ax1.set_ylabel('Best Validation Accuracy', color='blue')
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Val Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')
ax1.tick_params(axis='y', labelcolor='blue')

# ✅ Show only a few learning rates on x-axis
num_ticks_to_show = 6
tick_indices = np.round(np.linspace(0, len(filtered_lrs) - 1, num_ticks_to_show)).astype(int)
selected_lrs = filtered_lrs[tick_indices]
ax1.set_xticks(selected_lrs)
ax1.set_xticklabels([f"{lr:.1e}" for lr in selected_lrs], rotation=60)

# Right y-axis: Loss
ax2 = ax1.twinx()
ax2.set_ylabel('Average Validation Loss', color='red')
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Avg Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red')

plt.title('Validation Accuracy and Average Loss vs Learning Rate (ResNet50)')
plt.grid(True, which='both', axis='x')
fig.tight_layout()
plt.show()

### Take best Lr and do all 5 folds 5e-4

Fold 1

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold1')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load function ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 2–5) ===
X_train, y_train = [], []
for i in range(2, 6):
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 1) ===
X_val, y_val = load_images_and_labels(val_fold)

lr = 5e-4
print(f"\n🚀 Training with Learning Rate = {lr}")
model = create_resnet50_model(X_train.shape[1:], lr)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold1.keras'), save_best_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)

# === Save history ===
with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold1.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
val_preds = model.predict(X_val) > 0.5
val_f1 = f1_score(y_val, val_preds)
val_acc = np.mean(val_preds.flatten() == y_val)

print(f"✅ Final Metrics for LR={lr}")
print(f"Validation Accuracy: {val_acc:.2%}")
print(f"Validation F1 Score: {val_f1:.4f}")

Fold 2

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold2')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load function ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 1, 3, 4, 5) ===
X_train, y_train = [], []
for i in [1, 3, 4, 5]:
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 2) ===
X_val, y_val = load_images_and_labels(val_fold)

lr = 5e-4
print(f"\n🚀 Training with Learning Rate = {lr} using Fold 2 as Validation Set")
model = create_resnet50_model(X_train.shape[1:], lr)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold2.keras'), save_best_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)

# === Save history ===
with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold2.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
val_preds = model.predict(X_val) > 0.5
val_f1 = f1_score(y_val, val_preds)
val_acc = np.mean(val_preds.flatten() == y_val)

print(f"✅ Final Metrics for LR={lr}")
print(f"Validation Accuracy: {val_acc:.2%}")
print(f"Validation F1 Score: {val_f1:.4f}")

Fold 3

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold3')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load function ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 1, 2, 4, 5) ===
X_train, y_train = [], []
for i in [1, 2, 4, 5]:
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 3) ===
X_val, y_val = load_images_and_labels(val_fold)

lr = 5e-4
print(f"\n🚀 Training with Learning Rate = {lr} using Fold 3 as Validation Set")
model = create_resnet50_model(X_train.shape[1:], lr)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold3.keras'), save_best_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)

# === Save history ===
with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold3.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
val_preds = model.predict(X_val) > 0.5
val_f1 = f1_score(y_val, val_preds)
val_acc = np.mean(val_preds.flatten() == y_val)

print(f"✅ Final Metrics for LR={lr}")
print(f"Validation Accuracy: {val_acc:.2%}")
print(f"Validation F1 Score: {val_f1:.4f}")

Fold 4

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold4')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load function ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 1, 2, 3, 5) ===
X_train, y_train = [], []
for i in [1, 2, 3, 5]:
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 4) ===
X_val, y_val = load_images_and_labels(val_fold)

lr = 5e-4
print(f"\n🚀 Training with Learning Rate = {lr} using Fold 4 as Validation Set")
model = create_resnet50_model(X_train.shape[1:], lr)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold4.keras'), save_best_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)

# === Save history ===
with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold4.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
val_preds = model.predict(X_val) > 0.5
val_f1 = f1_score(y_val, val_preds)
val_acc = np.mean(val_preds.flatten() == y_val)

print(f"✅ Final Metrics for LR={lr}")
print(f"Validation Accuracy: {val_acc:.2%}")
print(f"Validation F1 Score: {val_f1:.4f}")

Fold 5

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.regularizers import l2
from sklearn.metrics import f1_score

# === Setup ===
tf.keras.backend.clear_session()
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(f"GPU Memory Growth Setting Error: {e}")

# === Directories ===
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
val_fold = os.path.join(base_fold_dir, 'Fold5')
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
os.makedirs(model_save_dir, exist_ok=True)
os.makedirs(history_save_dir, exist_ok=True)

# === Load function ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Model builder ===
def create_resnet50_model(image_shape, learning_rate):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# === Load Training Data (Folds 1, 2, 3, 4) ===
X_train, y_train = [], []
for i in [1, 2, 3, 4]:
    images, labels = load_images_and_labels(os.path.join(base_fold_dir, f'Fold{i}'))
    X_train.append(images)
    y_train.append(labels)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 5) ===
X_val, y_val = load_images_and_labels(val_fold)

lr = 5e-4
print(f"\n🚀 Training with Learning Rate = {lr} using Fold 5 as Validation Set")
model = create_resnet50_model(X_train.shape[1:], lr)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_save_dir, f'model_resnet_lr{lr}_fold5.keras'), save_best_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)

# === Save history ===
with open(os.path.join(history_save_dir, f'history_resnet_lr{lr}_fold5.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
val_preds = model.predict(X_val) > 0.5
val_f1 = f1_score(y_val, val_preds)
val_acc = np.mean(val_preds.flatten() == y_val)

print(f"✅ Final Metrics for LR={lr}")
print(f"Validation Accuracy: {val_acc:.2%}")
print(f"Validation F1 Score: {val_f1:.4f}")

### Analyze 5 folds 

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# Directories
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
folds = [1, 2, 3, 4, 5]

def plot_train_val_graphs(fold_histories):
    plt.figure(figsize=(14, 6))

    # Plot Training and Validation Loss
    plt.subplot(1, 2, 1)
    for fold, history in fold_histories.items():
        plt.plot(history['loss'], label=f'Fold {fold} Train Loss')
        plt.plot(history['val_loss'], linestyle='--', label=f'Fold {fold} Val Loss')
    plt.title('Training and Validation Loss Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(alpha=0.3)

    # Plot Training and Validation Accuracy
    plt.subplot(1, 2, 2)
    for fold, history in fold_histories.items():
        plt.plot(history['accuracy'], label=f'Fold {fold} Train Acc')
        plt.plot(history['val_accuracy'], linestyle='--', label=f'Fold {fold} Val Acc')
    plt.title('Training and Validation Accuracy Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

def load_history(fold):
    file_path = os.path.join(history_save_dir, f'history_resnet_lr0.0005_fold{fold}.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    else:
        print(f"History file not found for Fold {fold}")
        return None

# Load histories for all folds
fold_histories = {}
for fold in folds:
    history = load_history(fold)
    if history:
        fold_histories[fold] = history

# Plot graphs
if fold_histories:
    plot_train_val_graphs(fold_histories)
else:
    print("No valid history files found.")

Better graph

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
plt.rcParams.update({
    'font.size': 16,          # Base font size
    'axes.titlesize': 16,     # Title font size
    'axes.labelsize': 16,     # Axis label font size
    'xtick.labelsize': 16,    # X-tick label font size
    'ytick.labelsize': 16,    # Y-tick label font size
    'legend.fontsize': 16     # Legend font size (if you add one)
})

# Directories
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
folds = [1, 2, 3, 4, 5]

def plot_train_val_graphs(fold_histories):
    plt.figure(figsize=(14, 6))

    # Define colors
    train_color = 'blue'
    val_color = 'red'

    # Plot Training and Validation Loss
    plt.subplot(1, 2, 1)
    for fold, history in fold_histories.items():
        plt.plot(history['loss'], color=train_color, alpha=0.6, label=f'Fold {fold} Train Loss' if fold == 1 else "")
        plt.plot(history['val_loss'], linestyle='--', color=val_color, alpha=0.6, label=f'Fold {fold} Val Loss' if fold == 1 else "")
    
    plt.title('Training and Validation Loss Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(['Train Loss', 'Val Loss'])
    plt.grid(alpha=0.3)

    # Plot Training and Validation Accuracy
    plt.subplot(1, 2, 2)
    for fold, history in fold_histories.items():
        plt.plot(history['accuracy'], color=train_color, alpha=0.6, label=f'Fold {fold} Train Acc' if fold == 1 else "")
        plt.plot(history['val_accuracy'], linestyle='--', color=val_color, alpha=0.6, label=f'Fold {fold} Val Acc' if fold == 1 else "")
    
    plt.title('Training and Validation Accuracy Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(['Train Accuracy', 'Val Accuracy'])
    plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

def load_history(fold):
    file_path = os.path.join(history_save_dir, f'history_resnet_lr0.0005_fold{fold}.pkl')
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    else:
        print(f"History file not found for Fold {fold}")
        return None

# Load histories for all folds
fold_histories = {}
for fold in folds:
    history = load_history(fold)
    if history:
        fold_histories[fold] = history

# Plot graphs
if fold_histories:
    plot_train_val_graphs(fold_histories)
else:
    print("No valid history files found.")

Get metrics val set

In [None]:
import os
import numpy as np
import pickle
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
folds = [1, 2, 3, 4, 5]

# Metrics containers
train_accuracies, train_losses = [], []
val_accuracies, val_losses = [], []
recalls, f1_scores, precisions, auc_prs = [], [], [], []

def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): 
            continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

for fold in folds:
    print(f"\n📦 Processing Fold {fold}")

    # Load validation data
    val_dir = os.path.join(base_fold_dir, f'Fold{fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    if len(X_val) == 0:
        print(f"Fold {fold} - No validation data found.")
        continue

    # Load the model
    model_path = os.path.join(model_save_dir, f'model_resnet_lr0.0005_fold{fold}.keras')
    if not os.path.exists(model_path):
        print(f"Model for Fold {fold} not found.")
        continue

    model = load_model(model_path)

    # Predict on validation data
    y_val_pred = model.predict(X_val).flatten()

    # Convert predictions to binary
    y_pred_binary = (y_val_pred > 0.5).astype(int)

    # Calculate metrics
    val_acc = accuracy_score(y_val, y_pred_binary)
    recall = recall_score(y_val, y_pred_binary)
    f1 = f1_score(y_val, y_pred_binary)
    precision = precision_score(y_val, y_pred_binary)
    precision_curve, recall_curve, _ = precision_recall_curve(y_val, y_val_pred)
    auc_pr = auc(recall_curve, precision_curve)

    # Store fold metrics
    val_accuracies.append(val_acc)
    recalls.append(recall)
    f1_scores.append(f1)
    precisions.append(precision)
    auc_prs.append(auc_pr)

    # Print fold metrics
    print(f"Validation Accuracy: {val_acc:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"AUC-PR: {auc_pr:.4f}")

# Calculate and display averages
if len(val_accuracies) > 0:
    print("\n📊 AVERAGE METRICS ACROSS FOLDS:")
    print(f"Validation Accuracy - Avg: {np.mean(val_accuracies):.4f}, Std: {np.std(val_accuracies):.4f}")
    print(f"Recall - Avg: {np.mean(recalls):.4f}, Std: {np.std(recalls):.4f}")
    print(f"F1 Score - Avg: {np.mean(f1_scores):.4f}, Std: {np.std(f1_scores):.4f}")
    print(f"Precision - Avg: {np.mean(precisions):.4f}, Std: {np.std(precisions):.4f}")
    print(f"AUC-PR - Avg: {np.mean(auc_prs):.4f}, Std: {np.std(auc_prs):.4f}")
else:
    print("No metrics available for calculation.")

Draw graph of metrics on val set

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc, log_loss
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input

# Directories
base_fold_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed'
model_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_ExperimentsResNetCroppedPreProcessFina'
history_save_dir = '/Users/suzetteschulenburg/Desktop/MainUse/LearningRate_HistoriesResNetCroppedPreProcessFina'
folds = [1, 2, 3, 4, 5]

# Initialize metrics container
metrics_data = {
    'Fold': [],
    'Val Accuracy': [],
    'F1 Score': [],
    'Precision': [],
    'Recall': [],
    'AUC-PR': [],
    'Val Loss': []
}

def load_images_and_labels(image_dir):
    """Load images and labels from directory."""
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_dir = os.path.join(image_dir, subdir)
        if not os.path.exists(full_dir): 
            continue
        for fname in os.listdir(full_dir):
            if fname.lower().endswith('.jpg'):
                path = os.path.join(full_dir, fname)
                img = load_img(path, target_size=(224, 224))
                img_arr = preprocess_input(img_to_array(img))
                images.append(img_arr)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# Process each fold
for fold in folds:
    print(f"Processing Fold {fold}...")

    val_dir = os.path.join(base_fold_dir, f'Fold{fold}')
    model_path = os.path.join(model_save_dir, f'model_resnet_lr0.0005_fold{fold}.keras')
    history_path = os.path.join(history_save_dir, f'history_resnet_lr0.0005_fold{fold}.pkl')

    if not os.path.exists(model_path) or not os.path.exists(history_path):
        print(f"Missing model or history for Fold {fold}")
        continue

    # Load model and data
    model = load_model(model_path)
    X_val, y_val = load_images_and_labels(val_dir)

    # Predict
    y_pred_probs = model.predict(X_val).flatten()
    y_pred_labels = (y_pred_probs > 0.5).astype(int)

    # Calculate metrics
    val_acc = accuracy_score(y_val, y_pred_labels)
    f1 = f1_score(y_val, y_pred_labels, zero_division=1)
    precision = precision_score(y_val, y_pred_labels, zero_division=1)
    recall = recall_score(y_val, y_pred_labels, zero_division=1)
    prec_vals, rec_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(rec_vals, prec_vals)
    val_loss = log_loss(y_val, y_pred_probs)

    # Store metrics
    metrics_data['Fold'].append(f'Fold {fold}')
    metrics_data['Val Accuracy'].append(val_acc)
    metrics_data['F1 Score'].append(f1)
    metrics_data['Precision'].append(precision)
    metrics_data['Recall'].append(recall)
    metrics_data['AUC-PR'].append(auc_pr)
    metrics_data['Val Loss'].append(val_loss)

# Convert to DataFrame
metrics_df = pd.DataFrame(metrics_data)

# === Plotting ===
fig, ax1 = plt.subplots(figsize=(10, 6))

# Plot Metrics
ax1.plot(metrics_df['Fold'], metrics_df['Val Accuracy'], marker='o', linestyle='-', label='Val Accuracy', color='blue')
ax1.plot(metrics_df['Fold'], metrics_df['F1 Score'], marker='o', linestyle='-', label='F1 Score', color='green')
ax1.plot(metrics_df['Fold'], metrics_df['Precision'], marker='o', linestyle='-', label='Precision', color='red')
ax1.plot(metrics_df['Fold'], metrics_df['Recall'], marker='o', linestyle='-', label='Recall', color='purple')
ax1.plot(metrics_df['Fold'], metrics_df['AUC-PR'], marker='o', linestyle='-', label='AUC-PR', color='orange')

ax1.set_xlabel('Fold')
ax1.set_ylabel('Metrics')
ax1.set_xticks(np.arange(len(metrics_df['Fold'])))
ax1.set_xticklabels(metrics_df['Fold'])
ax1.grid(alpha=0.3, linestyle='--', linewidth=0.5)
ax1.legend(loc='upper left')

# Plot Validation Loss on Secondary Y-Axis
ax2 = ax1.twinx()
ax2.plot(metrics_df['Fold'], metrics_df['Val Loss'], marker='o', linestyle='--', label='Val Loss', color='black')
ax2.set_ylabel('Validation Loss')
ax2.tick_params(axis='y', labelcolor='black')
ax2.legend(loc='upper right')

plt.title('Metrics Across Folds with Validation Loss')
plt.tight_layout()
plt.show()

# Get yolo results

### Show images of YOLO

In [None]:
import os
import random
import matplotlib.pyplot as plt
import cv2

# === Path to processed images ===
base_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test'
classes = ['Good', 'Bad']

# === Collect all image paths ===
all_images = []
for cls in classes:
    cls_path = os.path.join(base_path, cls)
    image_files = [os.path.join(cls_path, fname) for fname in os.listdir(cls_path) if fname.endswith('.jpg')]
    all_images.extend(image_files)

# === Sample 5 random images ===
sample_images = random.sample(all_images, 4)

# === Plot the images ===
fig, axs = plt.subplots(1, 4, figsize=(20, 5))
for ax, img_path in zip(axs, sample_images):
    img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
    ax.imshow(img)
    ax.set_title(os.path.basename(img_path), fontsize=8)
    ax.axis('off')

plt.tight_layout()
plt.show()

### Do YOLO and manual segmentation for comparison

In [None]:
!pip install labelme

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/E1886_IMG_8243.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad/E1886_IMG_8243_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO/Bad/CSS19912_IMG_9248.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad/CSS19912_IMG_9248_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/CSS19912_IMG_9248.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad/CSS19912_IMG_9248_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/AK2015_IMG_9690.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Good/AK2015_IMG_9690_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/JH1673_IMG_9572.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad/JH1673_IMG_9572_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/WA1646_IMG_9669.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Good/WA1646_IMG_9669_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

Full set

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    old_size = image.shape[:2]  # height, width
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, (new_size[1], new_size[0]), interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - new_size[1]
    delta_h = desired_size - new_size[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# === Paths ===
labelme_json_path = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO/Bad/CSS19912_IMG_9248.json'
yolo_output_path = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad/CSS19912_IMG_9248_processed.jpg'

# === Load YOLO output (already cropped and resized) ===
yolo_img = cv2.cvtColor(cv2.imread(yolo_output_path), cv2.COLOR_BGR2RGB)

# === Load LabelMe JSON and original image ===
with open(labelme_json_path, 'r') as f:
    labelme_data = json.load(f)
original_img_path = os.path.join(os.path.dirname(labelme_json_path), labelme_data['imagePath'])
original_img = Image.open(original_img_path)

# === Create binary mask from LabelMe polygon ===
labelme_mask = Image.new('L', original_img.size, 0)
draw = ImageDraw.Draw(labelme_mask)
for shape in labelme_data['shapes']:
    if shape['shape_type'] == 'polygon':
        polygon = [(x, y) for x, y in shape['points']]
        draw.polygon(polygon, outline=1, fill=1)
labelme_mask = np.array(labelme_mask) * 255

# === Resize LabelMe mask to YOLO image size using padding ===
resized_mask = resize_with_padding(labelme_mask, desired_size=224)

# === Convert both to binary masks ===
labelme_binary = (resized_mask > 127).astype(np.uint8)

# Use the YOLO image to extract a binary mask (assumes white background)
yolo_gray = cv2.cvtColor(yolo_img, cv2.COLOR_RGB2GRAY)
yolo_mask = (yolo_gray < 250).astype(np.uint8)

# === Compute IoU ===
intersection = np.logical_and(yolo_mask, labelme_binary).sum()
union = np.logical_or(yolo_mask, labelme_binary).sum()
iou = intersection / union if union != 0 else 0
print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# === Visualization ===
# Convert labelme mask to 3-channel for overlay
resized_mask_rgb = np.stack([resized_mask]*3, axis=-1)

overlay = cv2.addWeighted(yolo_img, 0.6, resized_mask_rgb, 0.4, 0)

# === Show side-by-side comparison ===
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
axs[0].imshow(yolo_img)
axs[0].set_title("YOLO Cutout")
axs[1].imshow(resized_mask, cmap='gray')
axs[1].set_title("Manual LabelMe Mask (Resized)")
axs[2].imshow(overlay)
axs[2].set_title("Overlay Comparison")

for ax in axs:
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import json
import os
import numpy as np
import cv2
# import matplotlib.pyplot as plt # Kept commented out as you requested no display
from PIL import Image, ImageDraw

# === Helper: Resize with padding like YOLO output ===
def resize_with_padding(image, desired_size=224):
    """
    Resizes an image to desired_size with padding, maintaining aspect ratio.
    Pads with black (value=0).
    """
    old_size = image.shape[:2]  # height, width (e.g., 3456, 5184)
    ratio = float(desired_size) / max(old_size)
    new_size = tuple([int(x * ratio) for x in old_size])

    # Ensure new_size dimensions are integers for cv2.resize
    # cv2.resize expects (width, height)
    new_size_cv2 = (int(new_size[1]), int(new_size[0]))

    # Resize image with preserved aspect ratio
    resized = cv2.resize(image, new_size_cv2, interpolation=cv2.INTER_NEAREST)

    # Compute padding
    delta_w = desired_size - resized.shape[1]
    delta_h = desired_size - resized.shape[0]
    top, bottom = delta_h // 2, delta_h - delta_h // 2
    left, right = delta_w // 2, delta_w - delta_w // 2

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right,
                                 cv2.BORDER_CONSTANT, value=0)  # black background
    return padded

# --- Configuration Paths ---
# !!! IMPORTANT: Set your base directories here !!!
labelme_base_dir = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO/Bad/'
yolo_processed_base_dir = '/Users/suzetteschulenburg/Desktop/MainUseProcessed/Test/Bad/'
original_images_root_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test/Bad' # This is the explicit root for original images

# --- Processing Loop Initialization ---
json_files = [f for f in os.listdir(labelme_base_dir) if os.path.isfile(os.path.join(labelme_base_dir, f)) and f.endswith('.json')]

print(f"Found {len(json_files)} LabelMe JSON files in '{labelme_base_dir}'.")
print(f"Attempting to match them with YOLO images in '{yolo_processed_base_dir}'.")
print(f"Original images will be looked for in: '{original_images_root_dir}'.")
print("\n--- NOTE: All images and masks WILL BE RESIZED to 224x224 for consistent IoU computation. ---")
print("--- PAY ATTENTION to 'DIAGNOSTIC PATHS' below for CSS19912_IMG_9249 ---")


processed_files_count = 0
skipped_files_count = 0
all_ious = []

# --- Main Loop ---
for json_file_name in json_files:
    labelme_json_path = os.path.join(labelme_base_dir, json_file_name)
    base_name = os.path.splitext(json_file_name)[0]
    yolo_image_name = f"{base_name}_processed.jpg"
    yolo_output_path = os.path.join(yolo_processed_base_dir, yolo_image_name)

    print(f"\n--- Processing: '{base_name}' ---")
    print(f"  LabelMe JSON: {labelme_json_path}")
    print(f"  YOLO Image:   {yolo_output_path}") # This already prints the YOLO path

    try:
        # 1. Load YOLO output image
        if not os.path.exists(yolo_output_path):
            print(f"  SKIPPING: Corresponding YOLO image NOT FOUND at {yolo_output_path}")
            skipped_files_count += 1
            continue
        yolo_img_raw = cv2.imread(yolo_output_path)
        if yolo_img_raw is None:
            print(f"  SKIPPING: Could not read YOLO image file: {yolo_output_path}")
            skipped_files_count += 1
            continue
        yolo_img_resized = resize_with_padding(cv2.cvtColor(yolo_img_raw, cv2.COLOR_BGR2RGB), desired_size=224)


        # 2. Load LabelMe JSON and original image
        with open(labelme_json_path, 'r') as f:
            labelme_data = json.load(f)

        original_img_filename = os.path.basename(labelme_data.get('imagePath', ''))

        if not original_img_filename:
            print(f"  SKIPPING: 'imagePath' field is missing or empty in LabelMe JSON for {base_name}.")
            skipped_files_count += 1
            continue

        original_img_full_path = os.path.join(original_images_root_dir, original_img_filename)

        # --- DIAGNOSTIC PRINT FOR CSS19912_IMG_9249 ---
        if base_name == "CSS19912_IMG_9249":
            print(f"  *** DIAGNOSTIC PATHS for {base_name} ***")
            print(f"  YOLO Path:        {yolo_output_path}")
            print(f"  Original Img Path: {original_img_full_path}")
            # Also print sums to see if masks are empty or different
            yolo_gray_diag = cv2.cvtColor(yolo_img_resized, cv2.COLOR_RGB2GRAY)
            yolo_mask_diag = (cv2.adaptiveThreshold(yolo_gray_diag, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) / 255).astype(np.uint8)
            print(f"  YOLO Mask Sum (pixels): {yolo_mask_diag.sum()}")
            
            # Re-generate LabelMe mask for diagnostic purposes to ensure it's fresh
            labelme_mask_pil_diag = Image.new('L', (original_img_np.shape[1], original_img_np.shape[0]), 0)
            draw_diag = ImageDraw.Draw(labelme_mask_pil_diag)
            for shape_diag in labelme_data.get('shapes', []):
                if shape_diag['shape_type'] == 'polygon' and 'points' in shape_diag:
                    polygon_diag = [(x, y) for x, y in shape_diag['points']]
                    if len(polygon_diag) >= 3:
                        draw_diag.polygon(polygon_diag, outline=1, fill=1)
            labelme_mask_raw_diag = np.array(labelme_mask_pil_diag) * 255
            resized_labelme_mask_diag = resize_with_padding(labelme_mask_raw_diag, desired_size=224)
            labelme_binary_diag = (resized_labelme_mask_diag > 127).astype(np.uint8)
            print(f"  LabelMe Mask Sum (pixels): {labelme_binary_diag.sum()}")
            print(f"  *** END DIAGNOSTIC ***")


        if not os.path.exists(original_img_full_path):
            print(f"  SKIPPING: Original image referenced in JSON NOT FOUND at {original_img_full_path}")
            skipped_files_count += 1
            continue

        original_img_pil = Image.open(original_img_full_path)
        original_img_np = np.array(original_img_pil)


        # 3. Create binary mask from LabelMe polygon on the ORIGINAL image size
        labelme_mask_pil = Image.new('L', (original_img_np.shape[1], original_img_np.shape[0]), 0)
        draw = ImageDraw.Draw(labelme_mask_pil)
        found_polygon = False
        for shape in labelme_data.get('shapes', []):
            if shape['shape_type'] == 'polygon' and 'points' in shape:
                polygon = [(x, y) for x, y in shape['points']]
                if len(polygon) >= 3:
                    draw.polygon(polygon, outline=1, fill=1)
                    found_polygon = True
        if not found_polygon:
            print(f"  SKIPPING: No valid polygon (with 3+ points) found in LabelMe JSON for {base_name}.")
            skipped_files_count += 1
            continue
        labelme_mask_raw = np.array(labelme_mask_pil) * 255


        # 4. Resize LabelMe mask to match the desired_size (224x224)
        resized_labelme_mask = resize_with_padding(labelme_mask_raw, desired_size=224)

        # 5. Convert LabelMe mask to binary (0 or 1)
        labelme_binary = (resized_labelme_mask > 127).astype(np.uint8)


        # 6. Create binary mask from YOLO image (already resized to 224x224)
        yolo_gray = cv2.cvtColor(yolo_img_resized, cv2.COLOR_RGB2GRAY)
        yolo_mask = cv2.adaptiveThreshold(yolo_gray, 255,
                                          cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                          cv2.THRESH_BINARY_INV,
                                          11,
                                          2)
        yolo_mask = (yolo_mask / 255).astype(np.uint8)


        # 7. Compute IoU
        if yolo_mask.shape != labelme_binary.shape:
            print(f"  CRITICAL ERROR: Internal mask shape mismatch after resizing for {base_name}! YOLO: {yolo_mask.shape}, LabelMe: {labelme_binary.shape}. Skipping.")
            skipped_files_count += 1
            continue

        intersection = np.logical_and(yolo_mask, labelme_binary).sum()
        union = np.logical_or(yolo_mask, labelme_binary).sum()
        iou = intersection / union if union != 0 else 0
        print(f"  Calculated IoU: {iou * 100:.2f}%")
        processed_files_count += 1
        all_ious.append(iou)

    except FileNotFoundError as e:
        print(f"  ERROR: File not found for {base_name} - {e}. Skipping.")
        skipped_files_count += 1
    except json.JSONDecodeError:
        print(f"  ERROR: Could not decode JSON for {base_name}. Skipping.")
        skipped_files_count += 1
    except Exception as e:
        print(f"  An unexpected error occurred while processing {base_name}: {e}. Skipping.")
        skipped_files_count += 1

# --- Final Summary ---
print("\n" + "="*60)
print("--- Processing Complete ---")
print(f"Total LabelMe JSON files found: {len(json_files)}")
print(f"Successfully processed: {processed_files_count} file pairs.")
print(f"Skipped due to errors, missing files, or invalid data: {skipped_files_count} file pairs.")

if all_ious:
    average_iou = np.mean(all_ious)
    print(f"\nOverall Average IoU across successfully processed files: {average_iou * 100:.2f}%")
else:
    print("\nNo IoU values were calculated successfully.")
print("="*60)

In [None]:
import json
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
from ultralytics import YOLO # Import YOLO model

# --- Paths for the specific image to test ---
# Please double-check these paths are correct for your system
labelme_json_path = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO/Bad/CSS19912_IMG_9248.json'
# We will NOT use yolo_output_path directly for mask extraction anymore.
# We'll run YOLO on the original image.
original_image_manual_path = '/Users/suzetteschulenburg/Desktop/MainUse/Test/Bad/CSS19912_IMG_9248.jpg'

# === Load YOLO segmentation model ===
try:
    model = YOLO("yolov8s-seg.pt") # Ensure you have this model downloaded or accessible
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    print("Please ensure 'yolov8s-seg.pt' is in your current directory or specified path.")
    exit()

# --- Load LabelMe JSON and original image ---
try:
    with open(labelme_json_path, 'r') as f:
        labelme_data = json.load(f)

    original_img_path = original_image_manual_path
    if not os.path.exists(original_img_path):
        raise FileNotFoundError(f"Original image NOT FOUND at {original_img_path}. Please check the path.")

    original_img_pil = Image.open(original_img_path).convert('RGB') # Ensure RGB for YOLO
    original_img_np = np.array(original_img_pil) # Original image as NumPy array
    original_h, original_w = original_img_np.shape[:2]

except FileNotFoundError as e:
    print(f"Error loading LabelMe data or original image: {e}")
    exit()
except json.JSONDecodeError:
    print(f"Error: Could not decode LabelMe JSON from {labelme_json_path}. Check file format.")
    exit()
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    exit()

# --- Create binary mask from LabelMe polygon (at original image resolution) ---
labelme_mask_pil = Image.new('L', (original_w, original_h), 0)
draw = ImageDraw.Draw(labelme_mask_pil)
found_polygon = False
labelme_polygon_points = []
for shape in labelme_data.get('shapes', []):
    if shape['shape_type'] == 'polygon' and 'points' in shape:
        polygon = [(x, y) for x, y in shape['points']]
        if len(polygon) >= 3:
            # Ensure points are integers for ImageDraw
            int_polygon = [(int(p[0]), int(p[1])) for p in polygon]
            draw.polygon(int_polygon, outline=1, fill=1)
            labelme_polygon_points.extend(polygon)
            found_polygon = True

if not found_polygon:
    print(f"Warning: No valid polygon found in LabelMe JSON for {os.path.basename(labelme_json_path)}. IoU will be 0.")
    # Create an empty mask if no polygon is found
    labelme_binary_original_res = np.zeros((original_h, original_w), dtype=np.uint8)
else:
    # Convert PIL mask to a binary NumPy array (0 or 1) at original resolution
    labelme_binary_original_res = (np.array(labelme_mask_pil) > 0).astype(np.uint8)

# --- Run YOLO segmentation on the ORIGINAL image ---
yolo_binary_original_res = np.zeros((original_h, original_w), dtype=np.uint8) # Initialize empty YOLO mask
yolo_confidence = 0.0

try:
    results = model(original_img_np, verbose=False) # Run inference, suppress verbose output
    masks = results[0].masks
    boxes = results[0].boxes
    names = results[0].names

    if masks is None or len(masks.data) == 0:
        print(f"❌ No cow mask found by YOLO in: {os.path.basename(original_img_path)}")
    else:
        # Find the largest 'cow' mask (or similar animal class)
        best_index = None
        largest_area = 0
        for i, cls_id in enumerate(boxes.cls.cpu().numpy()):
            name = names[int(cls_id)]
            if name in ['cow', 'bull', 'animal', 'cattle']: # Adjust class names if needed
                x1, y1, x2, y2 = map(int, boxes.xyxy[i].cpu().numpy())
                area = (x2 - x1) * (y2 - y1)
                if area > largest_area:
                    best_index = i
                    largest_area = area
        
        if best_index is not None:
            # Get the mask data, resize to original image dimensions
            yolo_mask_data = masks.data[best_index].cpu().numpy()
            yolo_mask_resized = cv2.resize(yolo_mask_data, (original_w, original_h), interpolation=cv2.INTER_NEAREST)
            yolo_binary_original_res = (yolo_mask_resized > 0.5).astype(np.uint8) # Threshold to binary
            yolo_confidence = float(boxes.conf[best_index].cpu().numpy())
            print(f"✅ YOLO detected a '{names[int(boxes.cls[best_index])]}' with confidence: {yolo_confidence:.2f}")
        else:
            print(f"❌ No valid 'cow' class detected by YOLO in: {os.path.basename(original_img_path)}")

except Exception as e:
    print(f"An error occurred during YOLO inference: {e}")

# --- Compute IoU (both masks are at original image resolution) ---
if yolo_binary_original_res.shape != labelme_binary_original_res.shape:
    print(f"Error: Mask shapes do not match for IoU calculation! YOLO: {yolo_binary_original_res.shape}, LabelMe: {labelme_binary_original_res.shape}")
    iou = 0.0
else:
    intersection = np.logical_and(yolo_binary_original_res, labelme_binary_original_res).sum()
    union = np.logical_or(yolo_binary_original_res, labelme_binary_original_res).sum()
    iou = intersection / union if union != 0 else 0

print(f"IoU (Intersection over Union): {iou * 100:.2f}%")

# --- Visualization ---
fig_title = (f"IoU: {iou * 100:.2f}% | YOLO Conf: {yolo_confidence:.2f} "
             f"for {os.path.basename(original_img_path)} "
             f"(Comparing Full-Image YOLO to Manual Label)")

fig, axs = plt.subplots(1, 3, figsize=(28, 10)) # Adjusted figsize for potentially large images

# Plot 1: Original Image with Manual Label (Ground Truth)
original_img_display_gt = original_img_np.copy()
if found_polygon:
    temp_pil_gt = Image.fromarray(original_img_display_gt)
    temp_draw_gt = ImageDraw.Draw(temp_pil_gt)
    int_polygon = [(int(p[0]), int(p[1])) for p in polygon] # Use int_polygon here
    temp_draw_gt.polygon(int_polygon, outline=(255, 0, 0), width=5) # Red outline
    axs[0].imshow(temp_pil_gt)
else:
    axs[0].imshow(original_img_display_gt)
axs[0].set_title("Original Image with Manual Label (Ground Truth)")
axs[0].axis('off')

# Plot 2: Original Image with YOLO Segmentation Mask
yolo_mask_colored = np.zeros_like(original_img_np, dtype=np.uint8)
yolo_mask_colored[yolo_binary_original_res > 0] = [0, 255, 0] # Green for YOLO mask

overlay_yolo_on_original = cv2.addWeighted(original_img_np, 0.7, yolo_mask_colored, 0.3, 0)
axs[1].imshow(overlay_yolo_on_original)
axs[1].set_title("Original Image with YOLO Prediction")
axs[1].axis('off')

# Plot 3: Combined Overlay (Manual Red + YOLO Green + Overlap Blue)
combined_mask_colored = np.zeros_like(original_img_np, dtype=np.uint8)
# Red: LabelMe only
combined_mask_colored[np.logical_and(labelme_binary_original_res, ~yolo_binary_original_res) > 0] = [255, 0, 0]
# Green: YOLO only
combined_mask_colored[np.logical_and(~labelme_binary_original_res, yolo_binary_original_res) > 0] = [0, 255, 0]
# Blue: Overlap
combined_mask_colored[np.logical_and(labelme_binary_original_res, yolo_binary_original_res) > 0] = [0, 0, 255]

final_overlay_display = cv2.addWeighted(original_img_np, 0.7, combined_mask_colored, 0.3, 0)
axs[2].imshow(final_overlay_display)
axs[2].set_title("Overlay: Manual (Red) + YOLO (Green) + Overlap (Blue)")
axs[2].axis('off')

plt.suptitle(fig_title, fontsize=18, y=0.98) # Adjusted y for title
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

In [None]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
import cv2
from ultralytics import YOLO

# === Paths ===
json_dir = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO/Good'
image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test/Good'

# === Load YOLO model ===
model = YOLO("yolov8s-seg.pt")

ious = []
missing_images = []
processed = 0

for json_file in os.listdir(json_dir):
    if not json_file.endswith('.json'):
        continue

    json_path = os.path.join(json_dir, json_file)
    image_name = os.path.splitext(json_file)[0] + '.jpg'
    image_path = os.path.join(image_dir, image_name)

    if not os.path.exists(image_path):
        missing_images.append(image_name)
        continue

    try:
        # === Load image and json ===
        with open(json_path, 'r') as f:
            labelme_data = json.load(f)

        img_pil = Image.open(image_path).convert('RGB')
        img_np = np.array(img_pil)
        h, w = img_np.shape[:2]

        # === Create LabelMe mask ===
        labelme_mask = Image.new('L', (w, h), 0)
        draw = ImageDraw.Draw(labelme_mask)
        found_polygon = False
        for shape in labelme_data.get('shapes', []):
            if shape['shape_type'] == 'polygon' and 'points' in shape:
                polygon = [(int(x), int(y)) for x, y in shape['points']]
                if len(polygon) >= 3:
                    draw.polygon(polygon, outline=1, fill=1)
                    found_polygon = True

        if not found_polygon:
            label_mask = np.zeros((h, w), dtype=np.uint8)
        else:
            label_mask = (np.array(labelme_mask) > 0).astype(np.uint8)

        # === YOLO prediction ===
        yolo_result = model(img_np, verbose=False)[0]
        yolo_mask_bin = np.zeros((h, w), dtype=np.uint8)

        if yolo_result.masks and len(yolo_result.masks.data) > 0:
            best_index = -1
            max_area = 0
            for i, cls in enumerate(yolo_result.boxes.cls.cpu().numpy()):
                if yolo_result.names[int(cls)] in ['cow', 'bull', 'animal', 'cattle']:
                    x1, y1, x2, y2 = map(int, yolo_result.boxes.xyxy[i].cpu().numpy())
                    area = (x2 - x1) * (y2 - y1)
                    if area > max_area:
                        max_area = area
                        best_index = i
            if best_index != -1:
                yolo_mask = yolo_result.masks.data[best_index].cpu().numpy()
                yolo_mask_resized = cv2.resize(yolo_mask, (w, h), interpolation=cv2.INTER_NEAREST)
                yolo_mask_bin = (yolo_mask_resized > 0.5).astype(np.uint8)

        # === IoU calculation ===
        intersection = np.logical_and(yolo_mask_bin, label_mask).sum()
        union = np.logical_or(yolo_mask_bin, label_mask).sum()
        iou = intersection / union if union != 0 else 0
        ious.append(iou)
        processed += 1

    except Exception as e:
        print(f"Error processing {json_file}: {e}")

# === Final Output ===
print(f"\nProcessed {processed} images.")
if missing_images:
    print("Missing image files for:")
    for img in missing_images:
        print(f" - {img}")

if ious:
    avg_iou = sum(ious) / len(ious)
    print(f"\n✅ Average IoU across all matched files: {avg_iou * 100:.2f}%")
else:
    print("❌ No IoU values computed (check files).")

In [None]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
import cv2
from ultralytics import YOLO

# === Base paths ===
base_json_dir = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO'
base_image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test'

# === Load YOLO model ===
model = YOLO("yolov8s-seg.pt")

def compute_avg_iou(subfolder):
    json_dir = os.path.join(base_json_dir, subfolder)
    image_dir = os.path.join(base_image_dir, subfolder)

    ious = []
    missing_images = 0
    total_files = 0

    for json_file in sorted(os.listdir(json_dir)):
        if not json_file.endswith('.json'):
            continue

        json_path = os.path.join(json_dir, json_file)
        image_name = os.path.splitext(json_file)[0] + '.jpg'
        image_path = os.path.join(image_dir, image_name)

        if not os.path.exists(image_path):
            missing_images += 1
            continue

        try:
            with open(json_path, 'r') as f:
                labelme_data = json.load(f)

            img_pil = Image.open(image_path).convert('RGB')
            img_np = np.array(img_pil)
            h, w = img_np.shape[:2]

            # Manual mask (already excludes bottom 30%)
            labelme_mask = Image.new('L', (w, h), 0)
            draw = ImageDraw.Draw(labelme_mask)
            for shape in labelme_data.get('shapes', []):
                if shape['shape_type'] == 'polygon' and 'points' in shape:
                    polygon = [(int(x), int(y)) for x, y in shape['points']]
                    if len(polygon) >= 3:
                        draw.polygon(polygon, outline=1, fill=1)

            label_mask = (np.array(labelme_mask) > 0).astype(np.uint8)

            # YOLO
            yolo_result = model(img_np, verbose=False)[0]
            yolo_mask_bin = np.zeros((h, w), dtype=np.uint8)

            if yolo_result.masks and len(yolo_result.masks.data) > 0:
                best_index = -1
                max_area = 0
                for i, cls in enumerate(yolo_result.boxes.cls.cpu().numpy()):
                    name = yolo_result.names[int(cls)]
                    if name in ['cow', 'bull', 'animal', 'cattle']:
                        x1, y1, x2, y2 = map(int, yolo_result.boxes.xyxy[i].cpu().numpy())
                        area = (x2 - x1) * (y2 - y1)
                        if area > max_area:
                            best_index = i
                            max_area = area

                if best_index != -1:
                    yolo_mask = yolo_result.masks.data[best_index].cpu().numpy()
                    yolo_mask_resized = cv2.resize(yolo_mask, (w, h), interpolation=cv2.INTER_NEAREST)
                    yolo_mask_bin = (yolo_mask_resized > 0.5).astype(np.uint8)

            # Crop only YOLO mask to top 70%
            cutoff = int(0.7 * h)
            yolo_mask_bin = yolo_mask_bin[:cutoff, :]
            label_mask_cropped = label_mask[:cutoff, :]

            # IoU
            intersection = np.logical_and(yolo_mask_bin, label_mask_cropped).sum()
            union = np.logical_or(yolo_mask_bin, label_mask_cropped).sum()
            iou = intersection / union if union != 0 else 0
            ious.append(iou)
            total_files += 1

        except Exception as e:
            print(f"Error processing {json_file} in {subfolder}: {e}")

    avg_iou = (sum(ious) / len(ious)) if ious else 0.0
    print(f"\n📁 {subfolder}:")
    print(f"Processed: {total_files} files")
    if missing_images > 0:
        print(f"Missing image files: {missing_images}")
    print(f"✅ Average IoU: {avg_iou * 100:.2f}%")

# === Run for both folders
compute_avg_iou('Good')
compute_avg_iou('Bad')

In [None]:
import os
import json
import numpy as np
from PIL import Image, ImageDraw
import cv2
from ultralytics import YOLO

# === Paths for Bad ===
json_dir = '/Users/suzetteschulenburg/Desktop/CowsTestYOLO/YOLO/Bad'
image_dir = '/Users/suzetteschulenburg/Desktop/MainUse/Test/Bad'

# === Load YOLO model ===
model = YOLO("yolov8s-seg.pt")

results = []  # (filename, IoU)
missing_images = 0
total_files = 0

for json_file in sorted(os.listdir(json_dir)):
    if not json_file.endswith('.json'):
        continue

    json_path = os.path.join(json_dir, json_file)
    image_name = os.path.splitext(json_file)[0] + '.jpg'
    image_path = os.path.join(image_dir, image_name)

    if not os.path.exists(image_path):
        missing_images += 1
        continue

    try:
        with open(json_path, 'r') as f:
            labelme_data = json.load(f)

        img_pil = Image.open(image_path).convert('RGB')
        img_np = np.array(img_pil)
        h, w = img_np.shape[:2]

        # Manual mask (ALREADY cropped, do not crop again)
        labelme_mask = Image.new('L', (w, h), 0)
        draw = ImageDraw.Draw(labelme_mask)
        for shape in labelme_data.get('shapes', []):
            if shape['shape_type'] == 'polygon' and 'points' in shape:
                polygon = [(int(x), int(y)) for x, y in shape['points']]
                if len(polygon) >= 3:
                    draw.polygon(polygon, outline=1, fill=1)
        label_mask = (np.array(labelme_mask) > 0).astype(np.uint8)

        # YOLO prediction
        yolo_result = model(img_np, verbose=False)[0]
        yolo_mask_bin = np.zeros((h, w), dtype=np.uint8)

        if yolo_result.masks and len(yolo_result.masks.data) > 0:
            # Find the cow with the highest confidence
            best_index = -1
            highest_conf = 0.0
            for i, cls in enumerate(yolo_result.boxes.cls.cpu().numpy()):
                name = yolo_result.names[int(cls)]
                if name in ['cow', 'bull', 'animal', 'cattle']:
                    conf = float(yolo_result.boxes.conf[i].cpu().numpy())
                    if conf > highest_conf:
                        best_index = i
                        highest_conf = conf

            if best_index != -1:
                yolo_mask = yolo_result.masks.data[best_index].cpu().numpy()
                yolo_mask_resized = cv2.resize(yolo_mask, (w, h), interpolation=cv2.INTER_NEAREST)
                yolo_mask_bin = (yolo_mask_resized > 0.5).astype(np.uint8)

        # Crop only YOLO mask (manual mask already excludes bottom 30%)
        cutoff = int(0.7 * h)
        yolo_mask_bin = yolo_mask_bin[:cutoff, :]

        # Keep manual mask as-is (already cropped)
        label_mask_cropped = label_mask

        # Compute IoU
        intersection = np.logical_and(yolo_mask_bin, label_mask_cropped).sum()
        union = np.logical_or(yolo_mask_bin, label_mask_cropped).sum()
        iou = intersection / union if union != 0 else 0
        results.append((json_file, iou))
        total_files += 1

    except Exception as e:
        print(f"Error processing {json_file}: {e}")

# === Sort by IoU (ascending)
results_sorted = sorted(results, key=lambda x: x[1])
avg_iou = sum(iou for _, iou in results_sorted) / len(results_sorted) if results_sorted else 0.0

# === Print results
print("\n📁 Bad")
print(f"Processed: {total_files} files")
print(f"Missing image files: {missing_images}")
print(f"✅ Average IoU: {avg_iou * 100:.2f}%")

print("\n❌ 5 Lowest IoU Samples:")
for filename, iou in results_sorted[:5]:
    print(f"{filename} → IoU: {iou * 100:.2f}%")

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(18, 5))
axs[0].imshow(label_mask, cmap='gray')
axs[0].set_title("Manual Mask")
axs[1].imshow(yolo_mask_resized, cmap='gray')
axs[1].set_title("YOLO Mask (Resized)")
axs[2].imshow(img_np)
axs[2].set_title("Original Image")
for ax in axs:
    ax.axis('off')
plt.tight_layout()
plt.show()


# Visualize archi

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc

# === Paths ===
model_path = '/Users/suzetteschulenburg/Desktop/Bulls/Trained_Bull_Models_FineTuneSweep/bull_model_lr1e-06_unf40.keras'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test'

# === Load model ===
model = load_model(model_path)

# === Extract the last conv layer name (adapt if different) ===
last_conv_layer_name = None
for layer in reversed(model.layers):
    if isinstance(layer, tf.keras.layers.Conv2D):
        last_conv_layer_name = layer.name
        break
if not last_conv_layer_name:
    raise ValueError("No Conv2D layer found in the model.")

# === Helper: Load test images and labels ===
def load_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                try:
                    path = os.path.join(full_path, fname)
                    img = load_img(path, target_size=(224, 224))
                    img_arr = img_to_array(img) / 255.0
                    images.append(img_arr)
                    labels.append(1 if subdir == 'Good' else 0)
                    paths.append(path)
                except Exception as e:
                    print(f"⚠️ {fname} skipped due to error: {e}")
    return np.array(images), np.array(labels), paths

X_test, y_test, image_paths = load_images_and_labels(test_dir)
y_probs = model.predict(X_test)
y_pred = (y_probs > 0.5).astype(int)

# === Grad-CAM Function ===
def generate_gradcam(model, img_array, last_conv_layer_name, pred_index=None):
    grad_model = Model(
        [model.inputs], 
        [model.get_layer(last_conv_layer_name).output, model.output]
    )
    
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(np.expand_dims(img_array, axis=0))
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]

    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]

    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    return heatmap

# === Plotting Helper ===
def display_gradcam(img_array, heatmap, alpha=0.4):
    img = np.uint8(255 * img_array)
    heatmap_resized = tf.image.resize(heatmap[..., np.newaxis], (224, 224)).numpy().squeeze()
    heatmap_resized = np.uint8(255 * heatmap_resized)
    heatmap_color = tf.keras.preprocessing.image.array_to_img(plt.cm.jet(heatmap_resized / 255.0)[..., :3])
    overlay = Image.fromarray(img.astype('uint8')).convert('RGBA')
    heatmap_img = heatmap_color.convert('RGBA')

    blended = Image.blend(overlay, heatmap_img, alpha=alpha)
    return blended

# === Show Grad-CAM for 2 TPs and 2 FPs ===
from PIL import Image

print("\n📸 Showing Grad-CAMs:")

# Collect indices
tp_idx = [i for i in range(len(y_test)) if y_test[i] == 1 and y_pred[i] == 1]
fp_idx = [i for i in range(len(y_test)) if y_test[i] == 0 and y_pred[i] == 1]

selected_indices = tp_idx[:2] + fp_idx[:2]

plt.figure(figsize=(12, 6))
for i, idx in enumerate(selected_indices):
    img = X_test[idx]
    heatmap = generate_gradcam(model, img, last_conv_layer_name)
    blended = display_gradcam(img, heatmap)

    plt.subplot(1, 4, i+1)
    plt.imshow(blended)
    true_label = "Good" if y_test[idx] == 1 else "Bad"
    pred_label = "Good" if y_pred[idx] == 1 else "Bad"
    plt.title(f"True: {true_label}\nPred: {pred_label}")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, SeparableConv2D

# === Paths ===
model_path = '/Users/suzetteschulenburg/Desktop/Bulls/Trained_Bull_Models_FineTuneSweep/bull_model_lr1e-06_unf40.keras'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test'

# === Load model ===
model = load_model(model_path)

# === Get all convolutional layers ===
conv_layer_names = [
    layer.name for layer in model.layers 
    if isinstance(layer, (Conv2D, DepthwiseConv2D, SeparableConv2D))
]

# You can reduce to only a few for visualization clarity
selected_layers = conv_layer_names[-4:]  # Last 4 conv-type layers

# === Load test images and labels ===
def load_images_and_labels(image_dir):
    images, labels, paths = [], [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                try:
                    path = os.path.join(full_path, fname)
                    img = load_img(path, target_size=(224, 224))
                    img_arr = img_to_array(img) / 255.0
                    images.append(img_arr)
                    labels.append(1 if subdir == 'Good' else 0)
                    paths.append(path)
                except Exception as e:
                    print(f"⚠️ {fname} skipped due to error: {e}")
    return np.array(images), np.array(labels), paths

X_test, y_test, image_paths = load_images_and_labels(test_dir)
y_probs = model.predict(X_test)
y_pred = (y_probs > 0.5).astype(int)

# === Grad-CAM Function ===
def generate_gradcam(model, img_array, layer_name, pred_index=None):
    grad_model = Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(np.expand_dims(img_array, axis=0))
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_channel = predictions[:, pred_index]
    grads = tape.gradient(class_channel, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = np.maximum(heatmap, 0)
    if tf.reduce_max(heatmap) == 0:
        return heatmap.numpy()
    heatmap /= tf.reduce_max(heatmap)
    return heatmap.numpy()

# === Overlay Function ===
def display_gradcam(img_array, heatmap, alpha=0.4):
    img = np.uint8(255 * img_array)
    heatmap_resized = tf.image.resize(heatmap[..., np.newaxis], (224, 224)).numpy().squeeze()
    heatmap_resized = np.uint8(255 * heatmap_resized)
    heatmap_color = plt.cm.jet(heatmap_resized / 255.0)[..., :3]
    heatmap_img = Image.fromarray((heatmap_color * 255).astype('uint8')).convert('RGBA')
    overlay = Image.fromarray(img.astype('uint8')).convert('RGBA')
    return Image.blend(overlay, heatmap_img, alpha=alpha)

# === Select 2 TPs + 2 FPs ===
tp_idx = [i for i in range(len(y_test)) if y_test[i] == 1 and y_pred[i] == 1]
fp_idx = [i for i in range(len(y_test)) if y_test[i] == 0 and y_pred[i] == 1]
selected_indices = tp_idx[:2] + fp_idx[:2]

# === Plot heatmaps per image per layer ===
for i, idx in enumerate(selected_indices):
    img = X_test[idx]
    true_label = "Good" if y_test[idx] == 1 else "Bad"
    pred_label = "Good" if y_pred[idx] == 1 else "Bad"

    plt.figure(figsize=(4 * len(selected_layers), 4))
    for j, layer_name in enumerate(selected_layers):
        heatmap = generate_gradcam(model, img, layer_name)
        blended = display_gradcam(img, heatmap)

        plt.subplot(1, len(selected_layers), j + 1)
        plt.imshow(blended)
        plt.title(f"{layer_name}", fontsize=9)
        plt.axis('off')

    plt.suptitle(f"Image {i+1}: True={true_label}, Pred={pred_label}", fontsize=14)
    plt.tight_layout(rect=[0, 0, 1, 0.93])
    plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# === Learning Rates to include (≤ 5e-3) ===
learning_rates = [
    5e-4, 7e-4, 1e-3, 1.5e-3, 2e-3, 2.5e-3, 3e-3, 4e-3, 5e-3,
    6e-3, 7e-3, 8e-3, 1e-2, 2e-2
]
history_base_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Histories'

val_accuracies, val_losses = [], []

print("📊 Validation Results (LR ≤ 5e-3):")
print(f"{'LR':>8} | {'Best Val Acc':>13} | {'Best Val Loss':>13}")
print("-" * 40)

for lr in learning_rates:
    tag = f"lr_{lr:.0e}".replace("-", "")
    history_path = os.path.join(history_base_dir, tag, 'history.pkl')

    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)

        best_val_acc = max(hist['val_accuracy'])
        best_val_loss = min(hist['val_loss'])

        val_accuracies.append(best_val_acc)
        val_losses.append(best_val_loss)

        print(f"{lr:>8.0e} | {best_val_acc:>13.4f} | {best_val_loss:>13.4f}")
    else:
        print(f"{lr:>8.0e} | {'MISSING':>13} | {'MISSING':>13}")
        val_accuracies.append(None)
        val_losses.append(None)

# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

filtered_lrs = np.array(filtered_lrs)
filtered_accs = np.array(filtered_accs)
filtered_losses = np.array(filtered_losses)

# === Convert learning rates to log10 space ===
log_lrs = np.log10(filtered_lrs)

# === Smooth interpolation ===
x_smooth = np.linspace(log_lrs.min(), log_lrs.max(), 300)
acc_smooth = make_interp_spline(log_lrs, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(log_lrs, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(14, 8))

# Accuracy plot
ax1.set_xlabel('Learning Rate', fontsize=22)
ax1.set_ylabel('Best Validation Accuracy', color='blue', fontsize=22)
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Val Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')

# === Unique exponents only ===
exponents = sorted(set(int(np.floor(np.log10(lr))) for lr in filtered_lrs))
xticks = [10**e for e in exponents]
xtick_labels = [f"$10^{{{e}}}$" for e in exponents]

ax1.set_xticks(xticks)
ax1.set_xticklabels(xtick_labels, fontsize=22)
ax1.tick_params(axis='y', labelcolor='blue', labelsize=22)
ax1.tick_params(axis='x', labelsize=22)

# Loss plot
ax2 = ax1.twinx()
ax2.set_ylabel('Best Validation Loss', color='red', fontsize=22)
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red', labelsize=22)

# Title and layout
plt.title('Validation Accuracy and Loss vs Learning Rate (Transfer to Bulls)', fontsize=22)
plt.grid(True)
fig.tight_layout()
plt.show()

In [None]:
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline

# === Learning Rates to include (≤ 5e-3) ===
learning_rates = [
    5e-4, 7e-4, 1e-3, 1.5e-3, 2e-3, 2.5e-3, 3e-3, 4e-3, 5e-3,
    6e-3, 7e-3, 8e-3, 1e-2, 2e-2
]

history_base_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Histories'

val_accuracies, val_losses = [], []

print("📊 Validation Results (LR ≤ 5e-3):")
print(f"{'LR':>8} | {'Best Val Acc':>13} | {'Avg Val Loss':>13}")
print("-" * 40)

for lr in learning_rates:
    tag = f"lr_{lr:.0e}".replace("-", "")
    history_path = os.path.join(history_base_dir, tag, 'history.pkl')

    if os.path.exists(history_path):
        with open(history_path, 'rb') as f:
            hist = pickle.load(f)

        best_val_acc = max(hist['val_accuracy'])
        avg_val_loss = np.mean(hist['val_loss'])

        val_accuracies.append(best_val_acc)
        val_losses.append(avg_val_loss)

        print(f"{lr:>8.0e} | {best_val_acc:>13.4f} | {avg_val_loss:>13.4f}")
    else:
        print(f"{lr:>8.0e} | {'MISSING':>13} | {'MISSING':>13}")
        val_accuracies.append(None)
        val_losses.append(None)

# === Filter out missing entries ===
filtered_lrs, filtered_accs, filtered_losses = [], [], []
for lr, acc, loss in zip(learning_rates, val_accuracies, val_losses):
    if acc is not None and loss is not None:
        filtered_lrs.append(lr)
        filtered_accs.append(acc)
        filtered_losses.append(loss)

filtered_lrs = np.array(filtered_lrs)
filtered_accs = np.array(filtered_accs)
filtered_losses = np.array(filtered_losses)

# === Convert learning rates to log10 space ===
log_lrs = np.log10(filtered_lrs)

# === Smooth interpolation ===
x_smooth = np.linspace(log_lrs.min(), log_lrs.max(), 300)
acc_smooth = make_interp_spline(log_lrs, filtered_accs, k=2)(x_smooth)
loss_smooth = make_interp_spline(log_lrs, filtered_losses, k=2)(x_smooth)

# === Plot ===
fig, ax1 = plt.subplots(figsize=(14, 8))

# Accuracy plot
ax1.set_xlabel('Learning Rate', fontsize=22)
ax1.set_ylabel('Best Validation Accuracy', color='blue', fontsize=22)
ax1.plot(10**x_smooth, acc_smooth, color='blue', label='Val Accuracy')
ax1.scatter(filtered_lrs, filtered_accs, color='blue')
ax1.set_xscale('log')

# Set x-tick labels using powers of 10
exponents = sorted(set(int(np.floor(np.log10(lr))) for lr in filtered_lrs))
xticks = [10**e for e in exponents]
xtick_labels = [f"$10^{{{e}}}$" for e in exponents]

ax1.set_xticks(xticks)
ax1.set_xticklabels(xtick_labels, fontsize=22)
ax1.tick_params(axis='y', labelcolor='blue', labelsize=22)
ax1.tick_params(axis='x', labelsize=22)

# Loss plot
ax2 = ax1.twinx()
ax2.set_ylabel('Avg Validation Loss', color='red', fontsize=22)
ax2.plot(10**x_smooth, loss_smooth, color='red', label='Avg Val Loss')
ax2.scatter(filtered_lrs, filtered_losses, color='red')
ax2.tick_params(axis='y', labelcolor='red', labelsize=22)

# Title and layout
plt.title('Validation Accuracy and Avg Loss vs Learning Rate (Transfer to Bulls)', fontsize=22)
plt.grid(True)
fig.tight_layout()
plt.show()

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'
model_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Models/lr_1e03/bull_transfer_model.keras'


# === Load test data ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Load model and predict ===
model = load_model(model_path)
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int)

# === Compute metrics ===
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
prec_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_probs)
auc_pr = auc(recall_curve, prec_curve)

# === Display ===
print(f"📊 Test Evaluation for LR = 1e-5")
print(f"Accuracy     : {acc:.4f}")
print(f"F1 Score     : {f1:.4f}")
print(f"Precision    : {precision:.4f}")
print(f"Recall       : {recall:.4f}")
print(f"AUC-PR       : {auc_pr:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'
model_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Models/lr_1e03/bull_transfer_model.keras'


# === Load test data ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Load model and predict ===
model = load_model(model_path)
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int)

# === Compute metrics ===
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
prec_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_probs)
auc_pr = auc(recall_curve, prec_curve)

# === Display ===
print(f"📊 Test Evaluation for LR = 1e-5")
print(f"Accuracy     : {acc:.4f}")
print(f"F1 Score     : {f1:.4f}")
print(f"Precision    : {precision:.4f}")
print(f"Recall       : {recall:.4f}")
print(f"AUC-PR       : {auc_pr:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'
model_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Models/lr_8e03/bull_transfer_model.keras'


# === Load test data ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Load model and predict ===
model = load_model(model_path)
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int)

# === Compute metrics ===
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
prec_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_probs)
auc_pr = auc(recall_curve, prec_curve)

# === Display ===
print(f"📊 Test Evaluation for LR = 1e-5")
print(f"Accuracy     : {acc:.4f}")
print(f"F1 Score     : {f1:.4f}")
print(f"Precision    : {precision:.4f}")
print(f"Recall       : {recall:.4f}")
print(f"AUC-PR       : {auc_pr:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)

In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'
model_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Models/lr_8e03/bull_transfer_model.keras'

# === Load test images and IDs ===
def load_images_labels_ids(image_dir):
    images, labels, ids = [], [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        for fname in sorted(os.listdir(full_path)):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                # Extract ID from filename (e.g. 'ADC123_1.jpg' → 'ADC123')
                id_part = fname.split('_')[0]
                ids.append(id_part)
    return np.array(images), np.array(labels), ids

X_test, y_test, ids = load_images_labels_ids(test_dir)

# === Load model and predict ===
model = load_model(model_path)
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

# === Group predictions by ID and majority vote ===
votes = defaultdict(list)
true_labels = {}

for pred, label, id_ in zip(y_pred, y_test, ids):
    votes[id_].append(pred)
    if id_ not in true_labels:
        true_labels[id_] = label  # Assumes all images of same cow have same label

majority_preds = {}
for id_, preds in votes.items():
    count = Counter(preds)
    final = 1 if count[1] >= 3 else 0
    majority_preds[id_] = final

# === Extract final predictions and true labels ===
final_ids = sorted(majority_preds.keys())
final_preds = [majority_preds[i] for i in final_ids]
final_trues = [true_labels[i] for i in final_ids]

# === Compute metrics ===
acc = accuracy_score(final_trues, final_preds)
f1 = f1_score(final_trues, final_preds)
precision = precision_score(final_trues, final_preds)
recall = recall_score(final_trues, final_preds)
conf_mat = confusion_matrix(final_trues, final_preds)

# === Display ===
print(f"📊 Majority Vote Test Evaluation (per individual)")
print(f"Total Individuals: {len(final_ids)}")
print(f"Accuracy         : {acc:.4f}")
print(f"F1 Score         : {f1:.4f}")
print(f"Precision        : {precision:.4f}")
print(f"Recall           : {recall:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)

In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'
model_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Models/lr_1e03/bull_transfer_model.keras'

# === Load test images and IDs ===
def load_images_labels_ids(image_dir):
    images, labels, ids = [], [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        for fname in sorted(os.listdir(full_path)):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                # Extract ID from filename (e.g. 'ADC123_1.jpg' → 'ADC123')
                id_part = fname.split('_')[0]
                ids.append(id_part)
    return np.array(images), np.array(labels), ids

X_test, y_test, ids = load_images_labels_ids(test_dir)

# === Load model and predict ===
model = load_model(model_path)
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

# === Group predictions by ID and majority vote ===
votes = defaultdict(list)
true_labels = {}

for pred, label, id_ in zip(y_pred, y_test, ids):
    votes[id_].append(pred)
    if id_ not in true_labels:
        true_labels[id_] = label  # Assumes all images of same cow have same label

majority_preds = {}
for id_, preds in votes.items():
    count = Counter(preds)
    final = 1 if count[1] >= 3 else 0
    majority_preds[id_] = final

# === Extract final predictions and true labels ===
final_ids = sorted(majority_preds.keys())
final_preds = [majority_preds[i] for i in final_ids]
final_trues = [true_labels[i] for i in final_ids]

# === Compute metrics ===
acc = accuracy_score(final_trues, final_preds)
f1 = f1_score(final_trues, final_preds)
precision = precision_score(final_trues, final_preds)
recall = recall_score(final_trues, final_preds)
conf_mat = confusion_matrix(final_trues, final_preds)

# === Display ===
print(f"📊 Majority Vote Test Evaluation (per individual)")
print(f"Total Individuals: {len(final_ids)}")
print(f"Accuracy         : {acc:.4f}")
print(f"F1 Score         : {f1:.4f}")
print(f"Precision        : {precision:.4f}")
print(f"Recall           : {recall:.4f}")
print("\nConfusion Matrix:")
print(conf_mat)

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Path to history file for LR = 1e-3 ===
history_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Histories/lr_1e03/history.pkl'

# === Load history ===
with open(history_path, 'rb') as f:
    history = pickle.load(f)

# === Plot ===
plt.figure(figsize=(12, 5))

# Accuracy Plot
plt.subplot(1, 2, 1)
plt.plot(history['accuracy'], label='Train Accuracy')
plt.plot(history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy (LR = 1e-3)')
plt.legend()

# Loss Plot
plt.subplot(1, 2, 2)
plt.plot(history['loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss (LR = 1e-3)')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Path to history file for LR = 1e-3 ===
history_path = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments30/Histories/lr_8e03/history.pkl'

# === Load history ===
with open(history_path, 'rb') as f:
    history = pickle.load(f)

# === Plot ===
plt.figure(figsize=(12, 5))

# Accuracy Plot
plt.subplot(1, 2, 1)
plt.plot(history['accuracy'], label='Train Accuracy')
plt.plot(history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy (LR = 1e-3)')
plt.legend()

# Loss Plot
plt.subplot(1, 2, 2)
plt.plot(history['loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss (LR = 8e-3)')
plt.legend()

plt.tight_layout()
plt.show()

5 Folds for bulls using lr 1e-03 using 30 layers Transfer learning

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_1e3Final5Folds'

# === Learning rate ===
lr = 1e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === 5-Fold Cross Validation ===
for val_fold in range(1, 6):
    print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

    # Prepare training data
    X_train, y_train = [], []
    for fold in range(1, 6):
        if fold == val_fold:
            continue
        fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
        X, y = load_images_and_labels(fold_dir)
        X_train.append(X)
        y_train.append(y)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)

    # Validation data
    val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    # Load and modify model
    model = load_model(cow_model_path)
    for layer in model.layers[:-30]:
        layer.trainable = False
    for layer in model.layers[-30:]:
        layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Create output directories
    fold_tag = f'fold{val_fold}'
    model_dir = os.path.join(output_base, 'Models', fold_tag)
    history_dir = os.path.join(output_base, 'Histories', fold_tag)
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    # Train
    start = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=1000,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed = time.time() - start
    print(f"⏱️ Training time: {elapsed:.2f}s")

    # Save training history
    with open(os.path.join(history_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate on validation set
    y_pred_probs = model.predict(X_val)
    y_pred = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred, zero_division=1)
    precision = precision_score(y_val, y_pred, zero_division=1)
    recall = recall_score(y_val, y_pred, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"📊 Fold {val_fold} Results (LR = 8e-3):")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_1e3Final5Folds'

# === Learning rate ===
lr = 1e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === 5-Fold Cross Validation ===
for val_fold in range(3, 6):
    print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

    # Prepare training data
    X_train, y_train = [], []
    for fold in range(1, 6):
        if fold == val_fold:
            continue
        fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
        X, y = load_images_and_labels(fold_dir)
        X_train.append(X)
        y_train.append(y)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)

    # Validation data
    val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    # Load and modify model
    model = load_model(cow_model_path)
    for layer in model.layers[:-30]:
        layer.trainable = False
    for layer in model.layers[-30:]:
        layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Create output directories
    fold_tag = f'fold{val_fold}'
    model_dir = os.path.join(output_base, 'Models', fold_tag)
    history_dir = os.path.join(output_base, 'Histories', fold_tag)
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    # Train
    start = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=1000,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed = time.time() - start
    print(f"⏱️ Training time: {elapsed:.2f}s")

    # Save training history
    with open(os.path.join(history_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate on validation set
    y_pred_probs = model.predict(X_val)
    y_pred = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred, zero_division=1)
    precision = precision_score(y_val, y_pred, zero_division=1)
    recall = recall_score(y_val, y_pred, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"📊 Fold {val_fold} Results (LR = 8e-3):")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_1e3Final5Folds'

# === Learning rate ===
lr = 1e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                try:
                    img = load_img(img_path, target_size=(224, 224))
                    img_array = img_to_array(img) / 255.0
                    images.append(img_array)
                    labels.append(1 if subdir == 'Good' else 0)
                except Exception as e:
                    print(f"Error loading {img_path}: {e}")
    return np.array(images), np.array(labels)

# === Fold 5 as Validation ===
val_fold = 5
print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

# === Load Training Data (Folds 1 to 4) ===
X_train, y_train = [], []
for fold in range(1, 5):
    fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
    X, y = load_images_and_labels(fold_dir)
    X_train.append(X)
    y_train.append(y)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# === Load Validation Data (Fold 5) ===
val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
X_val, y_val = load_images_and_labels(val_dir)

# === Load and Modify Model ===
model = load_model(cow_model_path)

# Freeze all but the last 30 layers
for layer in model.layers[:-30]:
    layer.trainable = False
for layer in model.layers[-30:]:
    layer.trainable = True

# === Compile Model ===
model.compile(optimizer=Adam(learning_rate=lr),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# === Output Directories ===
fold_tag = f'fold{val_fold}'
model_dir = os.path.join(output_base, 'Models', fold_tag)
history_dir = os.path.join(output_base, 'Histories', fold_tag)
os.makedirs(model_dir, exist_ok=True)
os.makedirs(history_dir, exist_ok=True)

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train Model ===
start = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed = time.time() - start
print(f"⏱️ Training time: {elapsed:.2f}s")

# === Save Training History ===
with open(os.path.join(history_dir, 'history.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate on Validation Set ===
y_pred_probs = model.predict(X_val)
y_pred = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred, zero_division=1)
precision = precision_score(y_val, y_pred, zero_division=1)
recall = recall_score(y_val, y_pred, zero_division=1)
accuracy = accuracy_score(y_val, y_pred)
precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

print(f"\n📊 Fold {val_fold} Results (LR = {lr}):")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

Test set

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_1e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Evaluate each fold's model ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} model on test set")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    prec_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_probs)
    auc_pr = auc(recall_curve, prec_curve)
    conf_mat = confusion_matrix(y_test, y_pred)

    print(f"📊 Fold {fold} Test Metrics:")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"AUC-PR       : {auc_pr:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")


In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_1e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images with filenames and IDs ===
def load_images_labels_and_ids(image_dir):
    images, labels, filenames, ids = [], [], [], []
    for subdir in ['Good', 'Bad']:
        subdir_path = os.path.join(image_dir, subdir)
        if not os.path.exists(subdir_path):
            continue
        for fname in os.listdir(subdir_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(subdir_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                filenames.append(fname)
                # Extract ID from filename (e.g., abc123_1.jpg → abc123)
                id_part = fname.split('_')[0]
                ids.append(id_part)
    return np.array(images), np.array(labels), np.array(filenames), np.array(ids)

X_test, y_test, filenames, ids = load_images_labels_and_ids(test_dir)

# === Evaluate each fold's model with majority voting per individual ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} with majority vote per individual")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)
    y_pred_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # === Group predictions and labels by ID ===
    id_to_preds = defaultdict(list)
    id_to_true = {}

    for pred, true, cow_id in zip(y_pred, y_test, ids):
        id_to_preds[cow_id].append(pred)
        id_to_true[cow_id] = true  # assumes all images of same cow have same label

    # === Apply majority vote per individual ===
    y_true_individuals = []
    y_pred_individuals = []

    for cow_id in sorted(id_to_preds.keys()):
        preds = id_to_preds[cow_id]
        vote = Counter(preds).most_common(1)[0][0]
        y_pred_individuals.append(vote)
        y_true_individuals.append(id_to_true[cow_id])

    # === Metrics ===
    acc = accuracy_score(y_true_individuals, y_pred_individuals)
    f1 = f1_score(y_true_individuals, y_pred_individuals)
    precision = precision_score(y_true_individuals, y_pred_individuals)
    recall = recall_score(y_true_individuals, y_pred_individuals)
    conf_mat = confusion_matrix(y_true_individuals, y_pred_individuals)

    print(f"📊 Fold {fold} Test Metrics (Majority Vote per Individual):")
    print(f"Individuals Evaluated: {len(y_true_individuals)}")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")

5 Folds for bulls using 8e-03

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds'

# === Learning rate ===
lr = 8e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === 5-Fold Cross Validation ===
for val_fold in range(1, 6):
    print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

    # Prepare training data
    X_train, y_train = [], []
    for fold in range(1, 6):
        if fold == val_fold:
            continue
        fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
        X, y = load_images_and_labels(fold_dir)
        X_train.append(X)
        y_train.append(y)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)

    # Validation data
    val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    # Load and modify model
    model = load_model(cow_model_path)
    for layer in model.layers[:-30]:
        layer.trainable = False
    for layer in model.layers[-30:]:
        layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Create output directories
    fold_tag = f'fold{val_fold}'
    model_dir = os.path.join(output_base, 'Models', fold_tag)
    history_dir = os.path.join(output_base, 'Histories', fold_tag)
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    # Train
    start = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=1000,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed = time.time() - start
    print(f"⏱️ Training time: {elapsed:.2f}s")

    # Save training history
    with open(os.path.join(history_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate on validation set
    y_pred_probs = model.predict(X_val)
    y_pred = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred, zero_division=1)
    precision = precision_score(y_val, y_pred, zero_division=1)
    recall = recall_score(y_val, y_pred, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"📊 Fold {val_fold} Results (LR = 8e-3):")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds'

# === Learning rate ===
lr = 8e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Only Folds 3, 4, and 5 ===
for val_fold in [3, 4, 5]:
    print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

    # Prepare training data from remaining folds
    X_train, y_train = [], []
    for fold in [1, 2, 3, 4, 5]:
        if fold == val_fold:
            continue
        fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
        X, y = load_images_and_labels(fold_dir)
        X_train.append(X)
        y_train.append(y)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)

    # Validation data
    val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    # Load and modify model
    model = load_model(cow_model_path)
    for layer in model.layers[:-30]:
        layer.trainable = False
    for layer in model.layers[-30:]:
        layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Create output directories
    fold_tag = f'fold{val_fold}'
    model_dir = os.path.join(output_base, 'Models', fold_tag)
    history_dir = os.path.join(output_base, 'Histories', fold_tag)
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    # Train
    start = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=1000,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed = time.time() - start
    print(f"⏱️ Training time: {elapsed:.2f}s")

    # Save training history
    with open(os.path.join(history_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate on validation set
    y_pred_probs = model.predict(X_val)
    y_pred = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred, zero_division=1)
    precision = precision_score(y_val, y_pred, zero_division=1)
    recall = recall_score(y_val, y_pred, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"📊 Fold {val_fold} Results (LR = {lr}):")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds'

# === Learning rate ===
lr = 8e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Only Folds 3, 4, and 5 ===
for val_fold in [5]:
    print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

    # Prepare training data from remaining folds
    X_train, y_train = [], []
    for fold in [1, 2, 3, 4, 5]:
        if fold == val_fold:
            continue
        fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
        X, y = load_images_and_labels(fold_dir)
        X_train.append(X)
        y_train.append(y)
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)

    # Validation data
    val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
    X_val, y_val = load_images_and_labels(val_dir)

    # Load and modify model
    model = load_model(cow_model_path)
    for layer in model.layers[:-30]:
        layer.trainable = False
    for layer in model.layers[-30:]:
        layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Create output directories
    fold_tag = f'fold{val_fold}'
    model_dir = os.path.join(output_base, 'Models', fold_tag)
    history_dir = os.path.join(output_base, 'Histories', fold_tag)
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
        ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model.keras'), save_best_only=True),
        TerminateOnNaN()
    ]

    # Train
    start = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=1000,
        batch_size=32,
        callbacks=callbacks,
        verbose=2
    )
    elapsed = time.time() - start
    print(f"⏱️ Training time: {elapsed:.2f}s")

    # Save training history
    with open(os.path.join(history_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history.history, f)

    # Evaluate on validation set
    y_pred_probs = model.predict(X_val)
    y_pred = (y_pred_probs > 0.5).astype(int)

    f1 = f1_score(y_val, y_pred, zero_division=1)
    precision = precision_score(y_val, y_pred, zero_division=1)
    recall = recall_score(y_val, y_pred, zero_division=1)
    accuracy = accuracy_score(y_val, y_pred)
    precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
    auc_pr = auc(recall_vals, precision_vals)

    print(f"📊 Fold {val_fold} Results (LR = {lr}):")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"AUC-PR:    {auc_pr:.4f}")

In [None]:
import os
import numpy as np
import pickle
import time
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TerminateOnNaN
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, precision_recall_curve, auc

# === Paths ===
cow_model_path = '/Users/suzetteschulenburg/Desktop/MainUse/MobileNet2_Frozen_AllLayersMinus4/model_fold2345_val1_frozen.keras'
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'
output_base = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds'

# === Learning rate ===
lr = 8e-3

# === Image Loader ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

# === Fold 5 Training ===
val_fold = 5
print(f"\n🚀 Fold {val_fold}: Training on all except Fold{val_fold}, validating on Fold{val_fold}")

# Prepare training data from remaining folds
X_train, y_train = [], []
for fold in [1, 2, 3, 4, 5]:
    if fold == val_fold:
        continue
    fold_dir = os.path.join(bull_base_dir, f'Fold{fold}')
    X, y = load_images_and_labels(fold_dir)
    X_train.append(X)
    y_train.append(y)
X_train = np.concatenate(X_train)
y_train = np.concatenate(y_train)

# Validation data
val_dir = os.path.join(bull_base_dir, f'Fold{val_fold}')
X_val, y_val = load_images_and_labels(val_dir)

# === Load and modify model ===
base_model = load_model(cow_model_path)

# Freeze most layers
for layer in base_model.layers[:-30]:
    layer.trainable = False
for layer in base_model.layers[-30:]:
    layer.trainable = True

# ✅ Add sigmoid activation to final output
x = base_model.output
output = Activation('sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output)

# ✅ Build model to finalize variables (prevents TFLite crash)
model(tf.zeros((1, 224, 224, 3)))

# === Compile ===
model.compile(optimizer=Adam(learning_rate=lr),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# === Create output directories ===
fold_tag = f'fold{val_fold}'
model_dir = os.path.join(output_base, 'Models', fold_tag)
history_dir = os.path.join(output_base, 'Histories', fold_tag)
os.makedirs(model_dir, exist_ok=True)
os.makedirs(history_dir, exist_ok=True)

# === Callbacks ===
callbacks = [
    EarlyStopping(monitor='val_loss', patience=35, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=25, min_lr=1e-7),
    ModelCheckpoint(os.path.join(model_dir, 'bull_transfer_model_built.keras'), save_best_only=True),
    TerminateOnNaN()
]

# === Train ===
start = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=1000,
    batch_size=32,
    callbacks=callbacks,
    verbose=2
)
elapsed = time.time() - start
print(f"⏱️ Training time: {elapsed:.2f}s")

# === Save training history ===
with open(os.path.join(history_dir, 'history_built.pkl'), 'wb') as f:
    pickle.dump(history.history, f)

# === Evaluate ===
y_pred_probs = model.predict(X_val)
y_pred = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_val, y_pred, zero_division=1)
precision = precision_score(y_val, y_pred, zero_division=1)
recall = recall_score(y_val, y_pred)
accuracy = accuracy_score(y_val, y_pred)
precision_vals, recall_vals, _ = precision_recall_curve(y_val, y_pred_probs)
auc_pr = auc(recall_vals, precision_vals)

print(f"\n📊 Fold {val_fold} Results (LR = {lr}):")
print(f"Accuracy:  {accuracy:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"AUC-PR:    {auc_pr:.4f}")

Analyze graph

In [None]:
import os
import pickle
import matplotlib.pyplot as plt

# === Font settings
plt.rcParams.update({
    'font.size': 18,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18,
    'legend.fontsize': 18
})

# === Directory and LR
base_history_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Histories'
chosen_lr = 8e-3
folds = [1, 2, 3, 4, 5]

# === Function to load history file
def load_history(fold):
    tag = f'fold{fold}'
    file_path = os.path.join(base_history_dir, tag, 'history.pkl')

    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    else:
        print(f"❌ History not found for Fold {fold}: {file_path}")
        return None

# === Load all fold histories
fold_histories = {}
for fold in folds:
    hist = load_history(fold)
    if hist:
        fold_histories[fold] = hist

# === Plotting function
def plot_train_val_graphs(fold_histories):
    plt.figure(figsize=(14, 6))

    # --- Loss plot
    plt.subplot(1, 2, 1)
    for fold, history in fold_histories.items():
        plt.plot(history['loss'], color='blue', alpha=0.5, label=f'Fold {fold} Train Loss' if fold == 1 else "")
        plt.plot(history['val_loss'], linestyle='--', color='red', alpha=0.5, label=f'Fold {fold} Val Loss' if fold == 1 else "")
    plt.title('Training and Validation Loss Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(['Train Loss', 'Val Loss'], loc='upper left', bbox_to_anchor=(0.01, 0.99), frameon=True)
    plt.grid(alpha=0.3)

    # --- Accuracy plot
    plt.subplot(1, 2, 2)
    for fold, history in fold_histories.items():
        plt.plot(history['accuracy'], color='blue', alpha=0.5, label=f'Fold {fold} Train Acc' if fold == 1 else "")
        plt.plot(history['val_accuracy'], linestyle='--', color='red', alpha=0.5, label=f'Fold {fold} Val Acc' if fold == 1 else "")
    plt.title('Training and Validation Accuracy Across Folds (LR=8e-3)')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(['Train Accuracy', 'Val Accuracy'], loc='upper left', bbox_to_anchor=(0.01, 0.99), frameon=True)
    plt.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

# === Run plotting
if fold_histories:
    plot_train_val_graphs(fold_histories)
else:
    print("🚫 No valid history files found.")

Last 10 epochs

In [None]:
import os
import pickle
import math
import numpy as np
import matplotlib.pyplot as plt

# === Font settings (large & clean) ===
plt.rcParams.update({
    'font.size': 18,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 16,
    'ytick.labelsize': 16,
    'legend.fontsize': 16
})

# === Config ===
base_history_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Histories'
folds = [1, 2, 3, 4, 5]
model_label = 'VGG16 (LR=8e-3)'  # <- change if you want on the titles
show_ema = True                   # toggle smoothing overlay on/off
ema_alpha = 0.2                   # lower = more smoothing

# === Utilities ===
def load_history(fold):
    """Load Keras History.history dict from pickle."""
    file_path = os.path.join(base_history_dir, f'fold{fold}', 'history.pkl')
    if not os.path.exists(file_path):
        print(f"❌ Not found: {file_path}")
        return None
    with open(file_path, 'rb') as f:
        hist = pickle.load(f)
    # Some pickles store History object; others store dict. Normalize to dict.
    if hasattr(hist, 'history'):
        hist = hist.history
    return hist

def get_metric(history, key, default_key=None):
    """Safely fetch a metric from history, trying an alternate key if needed."""
    if key in history:
        return list(history[key])
    if default_key and default_key in history:
        return list(history[default_key])
    # Try common variants
    variants = [key, key.lower(), key.replace('accuracy','acc'), key.replace('val_accuracy','val_acc')]
    for v in variants:
        if v in history:
            return list(history[v])
    return None

def exponential_moving_average(x, alpha=0.2):
    if x is None or len(x) == 0:
        return x
    out = [x[0]]
    for i in range(1, len(x)):
        out.append(alpha * x[i] + (1 - alpha) * out[-1])
    return out

# === Load histories ===
fold_histories = {}
for f in folds:
    h = load_history(f)
    if h:
        fold_histories[f] = h

if not fold_histories:
    raise SystemExit("🚫 No valid history files found.")

# === Collect best epoch stats per fold ===
summary = []  # each item: dict with fold, best_epoch, val_loss, val_acc, train_loss, train_acc
for f, hist in fold_histories.items():
    val_loss = get_metric(hist, 'val_loss')
    if not val_loss:
        print(f"⚠️ Fold {f}: no 'val_loss' found; skipping.")
        continue
    loss = get_metric(hist, 'loss')
    val_acc = get_metric(hist, 'val_accuracy', default_key='val_acc')
    acc = get_metric(hist, 'accuracy', default_key='acc')

    best_idx = int(np.argmin(val_loss))
    row = {
        'fold': f,
        'best_epoch_idx': best_idx,
        'best_epoch_number': best_idx + 1,  # human-friendly 1-based
        'best_val_loss': float(val_loss[best_idx]),
        'best_val_acc': float(val_acc[best_idx]) if val_acc else float('nan'),
        'train_loss_at_best': float(loss[best_idx]) if loss else float('nan'),
        'train_acc_at_best': float(acc[best_idx]) if acc else float('nan'),
        'num_epochs': len(val_loss)
    }
    summary.append(row)

# Sort by fold for nice printing
summary = sorted(summary, key=lambda d: d['fold'])

# === Print a compact table ===
print("\n📋 Best-epoch summary (per fold):")
print(f"{'Fold':>4} | {'Epoch':>5} | {'Val Loss':>9} | {'Val Acc':>8} | {'Train Loss':>10} | {'Train Acc':>9} | {'#Ep':>4}")
print("-" * 64)
for r in summary:
    print(f"{r['fold']:>4} | {r['best_epoch_number']:>5} | {r['best_val_loss']:>9.4f} | "
          f"{(r['best_val_acc'] if not math.isnan(r['best_val_acc']) else float('nan')):>8.4f} | "
          f"{(r['train_loss_at_best'] if not math.isnan(r['train_loss_at_best']) else float('nan')):>10.4f} | "
          f"{(r['train_acc_at_best'] if not math.isnan(r['train_acc_at_best']) else float('nan')):>9.4f} | "
          f"{r['num_epochs']:>4}")

# === Aggregate stats across folds ===
valid_accs = [r['best_val_acc'] for r in summary if not math.isnan(r['best_val_acc'])]
avg_best_val_acc = float(np.mean(valid_accs)) if valid_accs else float('nan')
avg_best_val_loss = float(np.mean([r['best_val_loss'] for r in summary]))
print(f"\n🔢 Cross-fold averages — Best Val Acc: {avg_best_val_acc:.4f} | Best Val Loss: {avg_best_val_loss:.4f}\n")

# === Plot 1: Curves with best-epoch markers (val_loss & val_accuracy) ===
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

# --- Loss curves
ax = axes[0]
for f, hist in fold_histories.items():
    loss = get_metric(hist, 'loss')
    val_loss = get_metric(hist, 'val_loss')
    if not val_loss: 
        continue
    epochs = np.arange(1, len(val_loss) + 1)
    if show_ema:
        loss_sm = exponential_moving_average(loss, ema_alpha) if loss else None
        val_loss_sm = exponential_moving_average(val_loss, ema_alpha)
        if loss_sm:
            ax.plot(epochs, loss_sm, alpha=0.4, label=f'Fold {f} Train (EMA)')
        ax.plot(epochs, val_loss_sm, '--', alpha=0.9, label=f'Fold {f} Val (EMA)')
    else:
        if loss:
            ax.plot(epochs, loss, alpha=0.4, label=f'Fold {f} Train')
        ax.plot(epochs, val_loss, '--', alpha=0.9, label=f'Fold {f} Val')

    # best epoch marker
    best_idx = int(np.argmin(val_loss))
    ax.axvline(best_idx + 1, color='gray', linestyle=':', alpha=0.6)

ax.set_title(f'Loss Curves with Best-Epoch Markers\n{model_label}')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.grid(alpha=0.3)
ax.legend(ncol=2, frameon=True, fontsize=12)

# --- Accuracy curves
ax = axes[1]
for f, hist in fold_histories.items():
    acc = get_metric(hist, 'accuracy', default_key='acc')
    val_acc = get_metric(hist, 'val_accuracy', default_key='val_acc')
    if not val_acc:
        continue
    epochs = np.arange(1, len(val_acc) + 1)
    if show_ema:
        acc_sm = exponential_moving_average(acc, ema_alpha) if acc else None
        val_acc_sm = exponential_moving_average(val_acc, ema_alpha)
        if acc_sm:
            ax.plot(epochs, acc_sm, alpha=0.4, label=f'Fold {f} Train (EMA)')
        ax.plot(epochs, val_acc_sm, '--', alpha=0.9, label=f'Fold {f} Val (EMA)')
    else:
        if acc:
            ax.plot(epochs, acc, alpha=0.4, label=f'Fold {f} Train')
        ax.plot(epochs, val_acc, '--', alpha=0.9, label=f'Fold {f} Val')

    # best epoch marker (by val_loss)
    val_loss = get_metric(hist, 'val_loss')
    if val_loss:
        best_idx = int(np.argmin(val_loss))
        ax.axvline(best_idx + 1, color='gray', linestyle=':', alpha=0.6)

ax.set_title(f'Accuracy Curves with Best-Epoch Markers\n{model_label}')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.grid(alpha=0.3)
ax.legend(ncol=2, frameon=True, fontsize=12)

plt.tight_layout()
plt.show()

# === Plot 2: Bar chart of best val_accuracy per fold + average line ===
fold_ids = [r['fold'] for r in summary]
best_accs = [r['best_val_acc'] for r in summary]

plt.figure(figsize=(12, 6))
bars = plt.bar([str(f) for f in fold_ids], best_accs)
for b, v in zip(bars, best_accs):
    plt.text(b.get_x() + b.get_width()/2, v + 0.002, f"{v:.3f}", ha='center', va='bottom', fontsize=14)

if not math.isnan(avg_best_val_acc):
    plt.axhline(avg_best_val_acc, linestyle='--', alpha=0.8)
    plt.text(len(fold_ids)-0.4, avg_best_val_acc + 0.002, f"Avg {avg_best_val_acc:.3f}", fontsize=14)

plt.title(f'Best Validation Accuracy per Fold\n{model_label}')
plt.xlabel('Fold')
plt.ylabel('Best Validation Accuracy')
plt.ylim(0, 1)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

NO TRansfer learning

In [None]:
import os
import pickle
import math
import numpy as np
import matplotlib.pyplot as plt

# === Font settings (consistent) ===
plt.rcParams.update({
    'font.size': 18,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18,
    'legend.fontsize': 16
})

# === Paths (your corrected set) ===
base_history_dir = '/Users/suzetteschulenburg/Desktop/Bulls/History_MobileNet2_LRBUllsMore'
chosen_lr = 2e-3
folds = [1, 2, 3, 4, 5]
model_label = f'MobileNetV2 (LR={chosen_lr:.0e})'

# === Options ===
show_ema = True     # overlay smoothing to calm noisy curves
ema_alpha = 0.2     # lower = more smoothing
zoom_last_n = 10    # how many epochs to zoom into at the end

# ---------- Utilities ----------
def lr_tag_variants(lr):
    """Produce likely folder-tag strings for the LR portion."""
    # 0.002 -> '2e-03' and '2e-3'
    s1 = f"{lr:.0e}"        # e.g., '2e-03'
    s2 = s1.replace('e-0', 'e-')  # -> '2e-3'
    return [s1, s2]

def load_history(fold):
    """Try multiple tag variants to find history.pkl for the given fold."""
    tried = []
    for lr_tag in lr_tag_variants(chosen_lr):
        tag = f"LR_{lr_tag}_Fold{fold}"
        p = os.path.join(base_history_dir, tag, 'history.pkl')
        tried.append(p)
        if os.path.exists(p):
            with open(p, 'rb') as f:
                h = pickle.load(f)
            if hasattr(h, 'history'):
                h = h.history  # normalize to dict
            return h
        # Some users stored a tag without the minus sign at all
        tag2 = tag.replace('-', '')
        p2 = os.path.join(base_history_dir, tag2, 'history.pkl')
        tried.append(p2)
        if os.path.exists(p2):
            with open(p2, 'rb') as f:
                h = pickle.load(f)
            if hasattr(h, 'history'):
                h = h.history
            return h
    print(f"❌ History not found for Fold {fold}. Tried:\n  - " + "\n  - ".join(tried))
    return None

def get_metric(history, key, fallback=None):
    """Fetch metric with common variants handled."""
    if not history:
        return None
    if key in history:
        return list(history[key])
    if fallback and fallback in history:
        return list(history[fallback])
    for v in [key.lower(),
              key.replace('accuracy','acc'),
              key.replace('val_accuracy','val_acc'),
              key.replace('Accuracy','acc')]:
        if v in history:
            return list(history[v])
    return None

def ema(x, alpha=0.2):
    if x is None or len(x) == 0:
        return x
    out = [x[0]]
    for i in range(1, len(x)):
        out.append(alpha * x[i] + (1 - alpha) * out[-1])
    return out

# ---------- Load histories ----------
fold_histories = {}
for fold in folds:
    h = load_history(fold)
    if h:
        fold_histories[fold] = h

if not fold_histories:
    raise SystemExit("🚫 No valid history files found.")

# ---------- Summarize best epoch (by val_loss) ----------
summary = []
for f, hist in fold_histories.items():
    vloss = get_metric(hist, 'val_loss')
    if not vloss:
        print(f"⚠️ Fold {f}: missing 'val_loss'; skipping.")
        continue
    loss = get_metric(hist, 'loss')
    vacc = get_metric(hist, 'val_accuracy', fallback='val_acc')
    acc  = get_metric(hist, 'accuracy', fallback='acc')

    best_idx = int(np.argmin(vloss))
    summary.append({
        'fold': f,
        'best_epoch_idx': best_idx,
        'best_epoch_number': best_idx + 1,
        'best_val_loss': float(vloss[best_idx]),
        'best_val_acc': float(vacc[best_idx]) if vacc else float('nan'),
        'train_loss_at_best': float(loss[best_idx]) if loss else float('nan'),
        'train_acc_at_best': float(acc[best_idx]) if acc else float('nan'),
        'num_epochs': len(vloss)
    })

summary = sorted(summary, key=lambda d: d['fold'])

# Print table
print("\n📋 Best-epoch summary (per fold):")
print(f"{'Fold':>4} | {'Epoch':>5} | {'Val Loss':>9} | {'Val Acc':>8} | {'Train Loss':>10} | {'Train Acc':>9} | {'#Ep':>4}")
print("-" * 64)
for r in summary:
    print(f"{r['fold']:>4} | {r['best_epoch_number']:>5} | {r['best_val_loss']:>9.4f} | "
          f"{(r['best_val_acc'] if not math.isnan(r['best_val_acc']) else float('nan')):>8.4f} | "
          f"{(r['train_loss_at_best'] if not math.isnan(r['train_loss_at_best']) else float('nan')):>10.4f} | "
          f"{(r['train_acc_at_best'] if not math.isnan(r['train_acc_at_best']) else float('nan')):>9.4f} | "
          f"{r['num_epochs']:>4}")

valid_accs = [r['best_val_acc'] for r in summary if not math.isnan(r['best_val_acc'])]
avg_best_val_acc = float(np.mean(valid_accs)) if valid_accs else float('nan')
avg_best_val_loss = float(np.mean([r['best_val_loss'] for r in summary]))
print(f"\n🔢 Cross-fold averages — Best Val Acc: {avg_best_val_acc:.4f} | Best Val Loss: {avg_best_val_loss:.4f}\n")

# ---------- Plot 1: Full curves with best-epoch markers ----------
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

# Loss
ax = axes[0]
for f, hist in fold_histories.items():
    loss = get_metric(hist, 'loss')
    vloss = get_metric(hist, 'val_loss')
    if not vloss:
        continue
    epochs = np.arange(1, len(vloss) + 1)
    if show_ema:
        loss_sm = ema(loss, ema_alpha) if loss else None
        vloss_sm = ema(vloss, ema_alpha)
        if loss_sm:
            ax.plot(epochs, loss_sm, alpha=0.4, label=f'Fold {f} Train (EMA)')
        ax.plot(epochs, vloss_sm, '--', alpha=0.9, label=f'Fold {f} Val (EMA)')
    else:
        if loss:
            ax.plot(epochs, loss, alpha=0.4, label=f'Fold {f} Train')
        ax.plot(epochs, vloss, '--', alpha=0.9, label=f'Fold {f} Val')
    best_idx = int(np.argmin(vloss))
    ax.axvline(best_idx + 1, color='gray', linestyle=':', alpha=0.6)

ax.set_title(f'Loss Curves with Best-Epoch Markers\n{model_label}')
ax.set_xlabel('Epoch'); ax.set_ylabel('Loss'); ax.grid(alpha=0.3)
ax.legend(ncol=2, frameon=True, fontsize=12)

# Accuracy
ax = axes[1]
for f, hist in fold_histories.items():
    acc  = get_metric(hist, 'accuracy', fallback='acc')
    vacc = get_metric(hist, 'val_accuracy', fallback='val_acc')
    if not vacc:
        continue
    epochs = np.arange(1, len(vacc) + 1)
    if show_ema:
        acc_sm  = ema(acc, ema_alpha) if acc else None
        vacc_sm = ema(vacc, ema_alpha)
        if acc_sm:
            ax.plot(epochs, acc_sm, alpha=0.4, label=f'Fold {f} Train (EMA)')
        ax.plot(epochs, vacc_sm, '--', alpha=0.9, label=f'Fold {f} Val (EMA)')
    else:
        if acc:
            ax.plot(epochs, acc, alpha=0.4, label=f'Fold {f} Train')
        ax.plot(epochs, vacc, '--', alpha=0.9, label=f'Fold {f} Val')

    vloss = get_metric(hist, 'val_loss')
    if vloss:
        best_idx = int(np.argmin(vloss))
        ax.axvline(best_idx + 1, color='gray', linestyle=':', alpha=0.6)

ax.set_title(f'Accuracy Curves with Best-Epoch Markers\n{model_label}')
ax.set_xlabel('Epoch'); ax.set_ylabel('Accuracy'); ax.grid(alpha=0.3)
ax.legend(ncol=2, frameon=True, fontsize=12)

plt.tight_layout()
plt.show()

# ---------- Plot 2: Zoom on last N epochs (loss & accuracy) ----------
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

# Loss zoom
ax = axes[0]
for f, hist in fold_histories.items():
    loss = get_metric(hist, 'loss')
    vloss = get_metric(hist, 'val_loss')
    if not vloss: 
        continue
    # last N
    loss_last  = loss[-zoom_last_n:] if loss else None
    vloss_last = vloss[-zoom_last_n:]
    # epoch indices aligned to global epoch numbers
    start = len(vloss) - zoom_last_n + 1
    epochs = np.arange(start, start + len(vloss_last))
    if loss_last:
        ax.plot(epochs, loss_last, alpha=0.4, label=f'Fold {f} Train')
    ax.plot(epochs, vloss_last, '--', alpha=0.9, label=f'Fold {f} Val')
ax.set_title(f'Loss — Last {zoom_last_n} Epochs\n{model_label}')
ax.set_xlabel('Epoch'); ax.set_ylabel('Loss'); ax.grid(alpha=0.3)
ax.legend(ncol=2, frameon=True, fontsize=12)

# Accuracy zoom
ax = axes[1]
for f, hist in fold_histories.items():
    acc  = get_metric(hist, 'accuracy', fallback='acc')
    vacc = get_metric(hist, 'val_accuracy', fallback='val_acc')
    if not vacc: 
        continue
    acc_last  = acc[-zoom_last_n:] if acc else None
    vacc_last = vacc[-zoom_last_n:]
    start = len(vacc) - zoom_last_n + 1
    epochs = np.arange(start, start + len(vacc_last))
    if acc_last:
        ax.plot(epochs, acc_last, alpha=0.4, label=f'Fold {f} Train')
    ax.plot(epochs, vacc_last, '--', alpha=0.9, label=f'Fold {f} Val')
ax.set_title(f'Accuracy — Last {zoom_last_n} Epochs\n{model_label}')
ax.set_xlabel('Epoch'); ax.set_ylabel('Accuracy'); ax.grid(alpha=0.3)
ax.legend(ncol=2, frameon=True, fontsize=12)

plt.tight_layout()
plt.show()

# ---------- Plot 3: Best val_accuracy per fold + average ----------
fold_ids = [r['fold'] for r in summary]
best_accs = [r['best_val_acc'] for r in summary]

plt.figure(figsize=(12, 6))
bars = plt.bar([str(f) for f in fold_ids], best_accs)
for b, v in zip(bars, best_accs):
    plt.text(b.get_x() + b.get_width()/2, v + 0.002, f"{v:.3f}", ha='center', va='bottom', fontsize=14)

if not math.isnan(avg_best_val_acc):
    plt.axhline(avg_best_val_acc, linestyle='--', alpha=0.8)
    plt.text(len(fold_ids)-0.4, avg_best_val_acc + 0.002, f"Avg {avg_best_val_acc:.3f}", fontsize=14)

plt.title(f'Best Validation Accuracy per Fold\n{model_label}')
plt.xlabel('Fold'); plt.ylabel('Best Validation Accuracy'); plt.ylim(0, 1)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

Minimum loss

In [None]:
# === Print metrics at minimum val_loss per fold
print("\n📉 Metrics at Minimum Validation Loss Per Fold")
print(f"{'Fold':<6} {'Epoch':<6} {'Val Loss':<10} {'Val Acc':<10} {'Train Loss':<11} {'Train Acc':<10}")
print("-" * 60)

for fold, history in fold_histories.items():
    val_losses = history['val_loss']
    min_idx = int(np.argmin(val_losses))

    val_loss = val_losses[min_idx]
    val_acc = history['val_accuracy'][min_idx]
    train_loss = history['loss'][min_idx]
    train_acc = history['accuracy'][min_idx]

    # Correct if metrics were saved as percentage
    if val_acc > 1.5: val_acc /= 100.0
    if train_acc > 1.5: train_acc /= 100.0

    print(f"{fold:<6} {min_idx:<6} {val_loss:<10.4f} {val_acc:<10.4f} {train_loss:<11.4f} {train_acc:<10.4f}")

Test set

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images ===
def load_images_and_labels(image_dir):
    images, labels = [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in os.listdir(full_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
    return np.array(images), np.array(labels)

X_test, y_test = load_images_and_labels(test_dir)

# === Evaluate each fold's model ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} model on test set")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = (y_pred_probs > 0.5).astype(int).flatten()

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    prec_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_probs)
    auc_pr = auc(recall_curve, prec_curve)
    conf_mat = confusion_matrix(y_test, y_pred)

    print(f"📊 Fold {fold} Test Metrics:")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"AUC-PR       : {auc_pr:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# === Font settings
plt.rcParams.update({
    'font.size': 18,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18,
    'legend.fontsize': 18
})

# === Updated Fold metrics from test evaluation
metrics = [
    {'Fold': 1, 'Accuracy': 0.5532, 'F1 Score': 0.5227, 'Precision': 0.5610, 'Recall': 0.4894, 'AUC-PR': 0.5593},
    {'Fold': 2, 'Accuracy': 0.5000, 'F1 Score': 0.3380, 'Precision': 0.5000, 'Recall': 0.2553, 'AUC-PR': 0.5777},
    {'Fold': 3, 'Accuracy': 0.5426, 'F1 Score': 0.4691, 'Precision': 0.5588, 'Recall': 0.4043, 'AUC-PR': 0.6038},
    {'Fold': 4, 'Accuracy': 0.5426, 'F1 Score': 0.4691, 'Precision': 0.5588, 'Recall': 0.4043, 'AUC-PR': 0.5537},
    {'Fold': 5, 'Accuracy': 0.5532, 'F1 Score': 0.5333, 'Precision': 0.5581, 'Recall': 0.5106, 'AUC-PR': 0.4832}
]

# === Convert to long-form DataFrame
plot_data = []
for m in metrics:
    for metric_name in ['Accuracy', 'F1 Score', 'Precision', 'Recall', 'AUC-PR']:
        plot_data.append({
            'Fold': f"Fold {m['Fold']}",
            'Metric': metric_name,
            'Value': m[metric_name]
        })
df_plot = pd.DataFrame(plot_data)

# === Violin Plot
plt.figure(figsize=(10, 5))
sns.violinplot(x='Metric', y='Value', data=df_plot, inner='point', palette='muted')
plt.title('Distribution of Evaluation Metrics Across Folds')
plt.ylim(0.08, 1.0)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()


Majority

In [None]:
import os
import numpy as np
from collections import defaultdict, Counter
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images with filenames and IDs ===
def load_images_labels_and_ids(image_dir):
    images, labels, filenames, ids = [], [], [], []
    for subdir in ['Good', 'Bad']:
        subdir_path = os.path.join(image_dir, subdir)
        if not os.path.exists(subdir_path):
            continue
        for fname in os.listdir(subdir_path):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(subdir_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                filenames.append(fname)
                # Extract ID from filename (e.g., abc123_1.jpg → abc123)
                id_part = fname.split('_')[0]
                ids.append(id_part)
    return np.array(images), np.array(labels), np.array(filenames), np.array(ids)

X_test, y_test, filenames, ids = load_images_labels_and_ids(test_dir)

# === Evaluate each fold's model with majority voting per individual ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} with majority vote per individual")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)
    y_pred_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # === Group predictions and labels by ID ===
    id_to_preds = defaultdict(list)
    id_to_true = {}

    for pred, true, cow_id in zip(y_pred, y_test, ids):
        id_to_preds[cow_id].append(pred)
        id_to_true[cow_id] = true  # assumes all images of same cow have same label

    # === Apply majority vote per individual ===
    y_true_individuals = []
    y_pred_individuals = []

    for cow_id in sorted(id_to_preds.keys()):
        preds = id_to_preds[cow_id]
        vote = Counter(preds).most_common(1)[0][0]
        y_pred_individuals.append(vote)
        y_true_individuals.append(id_to_true[cow_id])

    # === Metrics ===
    acc = accuracy_score(y_true_individuals, y_pred_individuals)
    f1 = f1_score(y_true_individuals, y_pred_individuals)
    precision = precision_score(y_true_individuals, y_pred_individuals)
    recall = recall_score(y_true_individuals, y_pred_individuals)
    conf_mat = confusion_matrix(y_true_individuals, y_pred_individuals)

    print(f"📊 Fold {fold} Test Metrics (Majority Vote per Individual):")
    print(f"Individuals Evaluated: {len(y_true_individuals)}")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, precision_recall_curve, auc

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
test_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1'

# === Load test images (with filenames) ===
def load_images_labels_and_filenames(image_dir):
    images, labels, filenames = [], [], []
    for subdir in ['Good', 'Bad']:
        full_path = os.path.join(image_dir, subdir)
        if not os.path.exists(full_path):
            continue
        for fname in sorted(os.listdir(full_path)):
            if fname.lower().endswith('.jpg'):
                img_path = os.path.join(full_path, fname)
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img) / 255.0
                images.append(img_array)
                labels.append(1 if subdir == 'Good' else 0)
                filenames.append(f"{subdir}/{fname}")
    return np.array(images), np.array(labels), filenames

X_test, y_test, filenames = load_images_labels_and_filenames(test_dir)

# === Evaluate each fold's model ===
for fold in range(1, 6):
    print(f"\n🧪 Evaluating Fold {fold} model on test set")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found for Fold {fold}: {model_path}")
        continue

    model = load_model(model_path)

    # === Predict all ===
    y_pred_probs = model.predict(X_test, verbose=0).flatten()
    y_pred = (y_pred_probs > 0.5).astype(int)

    # === Print individual predictions ===
    print("\n📋 Per-image predictions:")
    for fname, prob, true_label in zip(filenames, y_pred_probs, y_test):
        pred_label = "Good" if prob > 0.5 else "Bad"
        actual_label = "Good" if true_label == 1 else "Bad"
        print(f"{fname:<50} | Predicted: {pred_label:>4} ({prob:.4f}) | Actual: {actual_label}")

    # === Metrics ===
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    prec_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_probs)
    auc_pr = auc(recall_curve, prec_curve)
    conf_mat = confusion_matrix(y_test, y_pred)

    print(f"\n📊 Fold {fold} Test Metrics:")
    print(f"Accuracy     : {acc:.4f}")
    print(f"F1 Score     : {f1:.4f}")
    print(f"Precision    : {precision:.4f}")
    print(f"Recall       : {recall:.4f}")
    print(f"AUC-PR       : {auc_pr:.4f}")
    print(f"Confusion Matrix:\n{conf_mat}")

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
image_path = "/Users/suzetteschulenburg/Desktop/BullsProcessed/Test_Copy1/Bad/R18228_6_IMG_3154_Rating5_processed.jpg"
# image_path = "/Users/suzetteschulenburg/Desktop/Bulls/Split copy 2/Test copy/Bad/R18228_6_IMG_3154_Rating5.jpg"

# === Load model (Fold 5) ===
model_path = os.path.join(base_model_dir, 'fold5', 'bull_transfer_model.keras')
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model not found: {model_path}")
model = load_model(model_path)
print(f"✅ Loaded model: {model_path}")

# === Preprocess image ===
if not os.path.exists(image_path):
    raise FileNotFoundError(f"Image not found: {image_path}")

img = load_img(image_path, target_size=(224, 224))
x = img_to_array(img) / 255.0
x = np.expand_dims(x, axis=0)

# === Predict ===
prob = float(model.predict(x, verbose=0).flatten()[0])  # sigmoid prob of "Good"
pred_label = "Good" if prob > 0.5 else "Bad"

# === Infer actual label from folder name ===
parent = os.path.basename(os.path.dirname(image_path))
actual_label = parent if parent in {"Good", "Bad"} else "Unknown"

# === Output ===
print("\n🖼️ Image:", image_path)
print(f"🔮 Predicted: {pred_label} (p(Good) = {prob:.4f})")
print(f"🏷️  Actual   : {actual_label}")

In [None]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# === Paths ===
base_model_dir = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models'
image_path = "/Users/suzetteschulenburg/Desktop/Bulls/Split copy 2/Test/Good/MAD21216_IMG_4_Rating8.jpg"

# === Preprocess the single image ===
img = load_img(image_path, target_size=(224, 224))
img_array = img_to_array(img).astype(np.float32) / 255.0
input_array = np.expand_dims(img_array, axis=0)  # Shape: (1, 224, 224, 3)

print(f"✅ Image loaded: {os.path.basename(image_path)}")
print(f"📐 Shape: {input_array.shape}, dtype: {input_array.dtype}")

# === Predict with each fold ===
for fold in range(1, 6):
    print(f"\n🧪 Predicting with Fold {fold} model")

    model_path = os.path.join(base_model_dir, f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.exists(model_path):
        print(f"⚠️ Model not found: {model_path}")
        continue

    model = load_model(model_path)
    model(tf.zeros((1, 224, 224, 3)))  # Warm-up

    prob = float(model.predict(input_array, verbose=0)[0][0])
    label = "Good" if prob > 0.5 else "Bad"

    print(f"🧪 Fold {fold} Prediction: {label} ({prob:.4f})")

In [None]:
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Activation

# === Load base model and wrap with sigmoid (if not done already) ===
base_model = load_model("/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models/fold5/bull_transfer_model.keras")
output = Activation("sigmoid")(base_model.output)
model = Model(inputs=base_model.input, outputs=output)

# === Save new model WITH sigmoid ===
model.save("/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds/Models/fold5/bull_transfer_model_with_sigmoid.keras")

In [None]:
from scipy.special import expit
prob = expit(float(output[0][0]))
label = "Good" if prob > 0.5 else "Bad"
print(f"🧪 Sigmoid-corrected TFLite Prediction: {label} ({prob:.4f})")


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# ================== Paths ==================
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'  # contains Fold1..Fold5/{Good,Bad}
output_base   = '/Users/suzetteschulenburg/Desktop/BullsTransfer/LR_Experiments_8e3Final5Folds'
# Models expected at: {output_base}/Models/fold{K}/bull_transfer_model.keras

# ================== Config ==================
FOLDS  = [1, 2, 3, 4, 5]
TOP_N  = 2               # how many misclassified originals to show per fold
BATCH  = 32

# Filenames containing any of these substrings are considered AUGMENTED and will be skipped
AUGMENTATION_KEYWORDS = [
    "_aug", "_flip", "_rot", "_rotated", "_shift", "_zoom", "_noise", "_noisy",
    "_blur", "_shear", "_contrast", "_bright", "_gamma", "_cutout", "_mixup"
]

def is_original_filename(fname: str) -> bool:
    f = fname.lower()
    return not any(k in f for k in AUGMENTATION_KEYWORDS)

def load_val_images_labels_paths(val_dir):
    """
    Loads ALL .jpg images from val_dir/{Good,Bad}. Returns (X, y, paths, filenames).
    """
    X, y, paths, names = [], [], [], []
    for subdir in ['Good', 'Bad']:
        full = os.path.join(val_dir, subdir)
        if not os.path.isdir(full):
            continue
        for fname in os.listdir(full):
            if not fname.lower().endswith('.jpg'):
                continue
            fpath = os.path.join(full, fname)
            try:
                img = load_img(fpath, target_size=(224, 224))
                arr = img_to_array(img) / 255.0
                X.append(arr)
                y.append(1 if subdir == 'Good' else 0)
                paths.append(fpath)
                names.append(fname)
            except Exception as e:
                print(f"⚠️ Skipping {fpath}: {e}")
    if len(X) == 0:
        return np.empty((0,224,224,3), dtype=np.float32), np.array([]), [], []
    return np.stack(X, axis=0), np.array(y), paths, names

def pick_top_misclassified_originals(y_true, probs, paths, names, top_n):
    """
    Rank misclassified ORIGINAL images by confidence, return indices (into arrays) of top_n.
    - misclassified if predicted != y_true
    - confidence = max(p, 1-p)
    - only keep entries where filename passes is_original_filename()
    """
    probs = probs.reshape(-1)
    preds = (probs >= 0.5).astype(int)
    conf  = np.maximum(probs, 1 - probs)

    # misclassified mask
    wrong = preds != y_true

    # original-only mask (no augmentation keywords)
    originals = np.array([is_original_filename(n) for n in names])

    # combine masks
    cand_idx = np.where(wrong & originals)[0]
    if cand_idx.size == 0:
        return []

    # sort by descending confidence
    order = cand_idx[np.argsort(-conf[cand_idx])]
    return order[:top_n]

# ================== Main ==================
for fold in FOLDS:
    print(f"\n🔎 Fold {fold}:")
    model_path = os.path.join(output_base, 'Models', f'fold{fold}', 'bull_transfer_model.keras')
    if not os.path.isfile(model_path):
        print(f"❌ Model not found: {model_path}")
        continue

    val_dir = os.path.join(bull_base_dir, f'Fold{fold}')
    X_val, y_val, paths, names = load_val_images_labels_paths(val_dir)
    if X_val.shape[0] == 0:
        print("⚠️ No validation images found.")
        continue

    model = load_model(model_path)
    probs = model.predict(X_val, batch_size=BATCH, verbose=0).reshape(-1)
    preds = (probs >= 0.5).astype(int)
    conf  = np.maximum(probs, 1 - probs)

    # choose top-N misclassified originals
    top_idx = pick_top_misclassified_originals(y_val, probs, paths, names, TOP_N)
    if len(top_idx) == 0:
        print("✅ No misclassified ORIGINAL images found in this fold.")
        continue

    # Print filenames in one line
    print("📂 Top misclassified ORIGINAL filenames:")
    print(" | ".join([names[i] for i in top_idx]))

    # Show images
    fig, axes = plt.subplots(1, len(top_idx), figsize=(6 * len(top_idx), 6))
    if len(top_idx) == 1:
        axes = [axes]

    for ax, i in zip(axes, top_idx):
        img = load_img(paths[i], target_size=(224, 224))
        ax.imshow(img)
        ax.axis("off")
        ax.set_title(
            f"{names[i]}\nTrue:{'Good' if y_val[i] else 'Bad'} | Pred:{'Good' if preds[i] else 'Bad'} | Conf:{conf[i]:.2f}",
            fontsize=11
        )
    plt.suptitle(f"Fold {fold} — Top {len(top_idx)} misclassified ORIGINALS", fontsize=15)
    plt.show()

In [None]:
import os
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img

# ===== Paths =====
bull_base_dir = '/Users/suzetteschulenburg/Desktop/BullsProcessed'

# ===== Example: fill in your own single misclassified filename per fold =====
# Put the filename exactly as it appears in the Good/Bad folder for each fold
misclassified_images = {
    1: "JH2128_16_Rating5_processed.jpg",
    2: "",
    3: "",
    4: "",
    5: ""
}

# ===== Display loop =====
for fold, fname in misclassified_images.items():
    if not fname.strip():
        continue
    
    # Search in both Good and Bad folders
    found_path = None
    for subdir in ["Good", "Bad"]:
        candidate = os.path.join(bull_base_dir, f"Fold{fold}", subdir, fname)
        if os.path.isfile(candidate):
            found_path = candidate
            break
    
    if not found_path:
        print(f"❌ Image not found for Fold {fold}: {fname}")
        continue
    
    # Show image
    img = load_img(found_path, target_size=(224, 224))
    plt.figure(figsize=(5, 5))
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"Fold {fold} - {fname}", fontsize=12)
    plt.show()