<a href="https://colab.research.google.com/github/Romal27/DSGP-24-Retina94/blob/Validating-the-input/Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import shutil
import uuid
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

def detect_duplicates(folder):
    seen = {}
    duplicates = {}
    for cls in os.listdir(folder):
        class_path = os.path.join(folder, cls)
        if not os.path.isdir(class_path):
            continue
        duplicates[cls] = []
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            try:
                img = Image.open(img_path).convert('RGB')
                img_hash = hash(img.tobytes())
                if img_hash in seen:
                    duplicates[cls].append(img_path)
                else:
                    seen[img_hash] = img_path
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
    return duplicates


def augment_image(image):
    return image.transpose(Image.FLIP_LEFT_RIGHT)

def augment_duplicates(folder, duplicates_per_class, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    for cls, duplicate_files in duplicates_per_class.items():
        class_output_path = os.path.join(output_folder, cls)
        os.makedirs(class_output_path, exist_ok=True)
        for filepath in duplicate_files:
            try:
                with Image.open(filepath) as img:
                    augmented_img = augment_image(img)
                    new_filename = f"{os.path.splitext(os.path.basename(filepath))[0]}_{uuid.uuid4().hex[:6]}.png"
                    new_filepath = os.path.join(class_output_path, new_filename)
                    augmented_img.save(new_filepath)
            except Exception as e:
                print(f"Error augmenting {filepath}: {e}")
                
def split_data(source_dir, output_dir, train_ratio=0.7, val_ratio=0.2):
    test_ratio = 1 - (train_ratio + val_ratio)
    for cls in os.listdir(source_dir):
        class_path = os.path.join(source_dir, cls)
        if not os.path.isdir(class_path):
            continue
        images = os.listdir(class_path)
        train, temp = train_test_split(images, test_size=(1 - train_ratio), stratify=[cls]*len(images), random_state=42)
        val, test = train_test_split(temp, test_size=(test_ratio / (val_ratio + test_ratio)), stratify=[cls]*len(temp), random_state=42)
        for subset, subset_images in zip(["train", "val", "test"], [train, val, test]):
            subset_path = os.path.join(output_dir, subset, cls)
            os.makedirs(subset_path, exist_ok=True)
            for img_name in subset_images:
                shutil.copy(os.path.join(class_path, img_name), os.path.join(subset_path, img_name))

# Define Paths
dataset_path = "D:/Datasets_Retina"
augmented_dataset_path = "D:/Datasets_Retina_Augmented"
combined_dataset_path = "D:/Datasets_Combined"
final_dataset_path = "D:/Datasets_Final"

# Handle duplicates
duplicates_per_class = detect_duplicates(dataset_path)
augment_duplicates(dataset_path, duplicates_per_class, augmented_dataset_path)

# Combine original and augmented datasets
shutil.copytree(dataset_path, combined_dataset_path, dirs_exist_ok=True)
shutil.copytree(augmented_dataset_path, combined_dataset_path, dirs_exist_ok=True)

# Split dataset properly to prevent data leakage
split_data(combined_dataset_path, final_dataset_path)

# Image Processing Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Data Augmentation for training set
data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.3,
    shear_range=0.2,
    brightness_range=[0.7, 1.3]
)

# Load Data
train_generator = data_gen.flow_from_directory(os.path.join(final_dataset_path, "train"), target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="binary")
val_generator = data_gen.flow_from_directory(os.path.join(final_dataset_path, "val"), target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="binary", shuffle=False)
test_generator = data_gen.flow_from_directory(os.path.join(final_dataset_path, "test"), target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="binary", shuffle=False)

# Improved CNN Model with Regularization
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001), input_shape=(224, 224, 3)),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.4),

    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.5),

    Flatten(),
    Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.5),
    
    Dense(1, activation='sigmoid')
])

# Compile Model with a lower learning rate
model.compile(optimizer=Adam(learning_rate=0.0003), loss='binary_crossentropy', metrics=['accuracy'])

# Early Stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)

# Train Model
model.fit(train_generator, validation_data=val_generator, epochs=25, callbacks=[early_stopping])

# Evaluate Model
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"\nValidation Accuracy: {val_accuracy * 100:.2f}%")

test_loss, test_accuracy = model.evaluate(test_generator)
print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")

# Predictions
y_true = test_generator.classes
y_pred = (model.predict(test_generator) > 0.5).astype(int)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Non-Fundus", "Fundus"]))


Found 5222 images belonging to 2 classes.
Found 1492 images belonging to 2 classes.
Found 748 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/25
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m550s[0m 3s/step - accuracy: 0.9392 - loss: 1.7771 - val_accuracy: 0.5442 - val_loss: 19.9984
Epoch 2/25
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m509s[0m 3s/step - accuracy: 0.9706 - loss: 0.9057 - val_accuracy: 0.5509 - val_loss: 18.9203
Epoch 3/25
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 2s/step - accuracy: 0.9772 - loss: 0.7324 - val_accuracy: 0.7882 - val_loss: 5.6246
Epoch 4/25
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m353s[0m 2s/step - accuracy: 0.9808 - loss: 0.6411 - val_accuracy: 0.9155 - val_loss: 2.9250
Epoch 5/25
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 2s/step - accuracy: 0.9848 - loss: 0.5825 - val_accuracy: 0.9350 - val_loss: 2.2543
Epoch 6/25
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 2s/step - accuracy: 0.9776 - loss: 0.6140 - val_accuracy: 0.9015 - val_loss: 3.1671
Epoch 7/25
[1m164/1

In [None]:
import os
import shutil
import uuid
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report


def detect_duplicates(folder):
    """Detect duplicate images in dataset"""
    seen = {}
    duplicates = {}
    for cls in os.listdir(folder):
        class_path = os.path.join(folder, cls)
        if not os.path.isdir(class_path):
            continue
        duplicates[cls] = []
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            try:
                img = Image.open(img_path).convert('RGB')
                img_hash = hash(img.tobytes())
                if img_hash in seen:
                    duplicates[cls].append(img_path)
                else:
                    seen[img_hash] = img_path
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
    return duplicates

def augment_image(image):
    return image.transpose(Image.FLIP_LEFT_RIGHT)

def augment_duplicates(folder, duplicates_per_class, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    for cls, duplicate_files in duplicates_per_class.items():
        class_output_path = os.path.join(output_folder, cls)
        os.makedirs(class_output_path, exist_ok=True)
        for filepath in duplicate_files:
            try:
                with Image.open(filepath) as img:
                    augmented_img = augment_image(img)
                    new_filename = f"{os.path.splitext(os.path.basename(filepath))[0]}_{uuid.uuid4().hex[:6]}.png"
                    new_filepath = os.path.join(class_output_path, new_filename)
                    augmented_img.save(new_filepath)
            except Exception as e:
                print(f"Error augmenting {filepath}: {e}")

dataset_path = "D:/Datasets_Retina"
final_dataset_path = "D:/Datasets_Final"

# Image Processing
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
K_FOLDS = 3 # Number of cross-validation folds

# Data Augmentation
data_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.3,
    shear_range=0.2,
    brightness_range=[0.7, 1.3]
)

all_images = []
all_labels = []

for cls in os.listdir(final_dataset_path + "/train"):
    class_path = os.path.join(final_dataset_path, "train", cls)
    label = 1 if cls == "Fundus" else 0  # Assuming binary classification
    for img_name in os.listdir(class_path):
        all_images.append(os.path.join(class_path, img_name))
        all_labels.append(label)

all_images = np.array(all_images)
all_labels = np.array(all_labels)

# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True, random_state=42)
fold_accuracies = []

for fold, (train_idx, val_idx) in enumerate(skf.split(all_images, all_labels)):
    print(f"\nTraining Fold {fold + 1}/{K_FOLDS}...\n")

    train_images, val_images = all_images[train_idx], all_images[val_idx]
    train_labels, val_labels = all_labels[train_idx], all_labels[val_idx]

    # Create Generators
    train_df = pd.DataFrame({"filename": train_images, "class": train_labels})
    val_df = pd.DataFrame({"filename": val_images, "class": val_labels})

  # Convert labels to string format ("Non-Fundus" and "Fundus")
    train_df["class"] = train_df["class"].map({0: "Non-Fundus", 1: "Fundus"})
    val_df["class"] = val_df["class"].map({0: "Non-Fundus", 1: "Fundus"})

    train_generator = data_gen.flow_from_dataframe(
        train_df, x_col="filename", y_col="class",
        target_size=IMG_SIZE, batch_size=BATCH_SIZE,
        class_mode="binary", shuffle=True
    )

    val_generator = data_gen.flow_from_dataframe(
        val_df, x_col="filename", y_col="class",
        target_size=IMG_SIZE, batch_size=BATCH_SIZE,
        class_mode="binary", shuffle=False
    )


    # Model Architecture
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001), input_shape=(224, 224, 3)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Dropout(0.3),

        Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Dropout(0.4),

        Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Dropout(0.5),

        Flatten(),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.5),

        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.0003), loss='binary_crossentropy', metrics=['accuracy'])

    # Train Model with Early Stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True)

    model.fit(train_generator, validation_data=val_generator, epochs=20, callbacks=[early_stopping])

    # Evaluate
    val_loss, val_accuracy = model.evaluate(val_generator)
    fold_accuracies.append(val_accuracy * 100)
    print(f"Fold {fold+1} Validation Accuracy: {val_accuracy * 100:.2f}%")

# Compute Final Cross-Validation Accuracy
final_accuracy = np.mean(fold_accuracies)
print(f"\nFinal Cross-Validated Accuracy: {final_accuracy:.2f}%")


test_generator = data_gen.flow_from_directory(
    os.path.join(final_dataset_path, "test"),
    target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode="binary", shuffle=False
)

y_true = test_generator.classes
y_pred = (model.predict(test_generator) > 0.5).astype(int)

print("\nFinal Classification Report:")
print(classification_report(y_true, y_pred, target_names=["Non-Fundus", "Fundus"]))



Training Fold 1/5...

Found 4177 validated image filenames belonging to 2 classes.
Found 1045 validated image filenames belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m591s[0m 5s/step - accuracy: 0.9324 - loss: 1.6254 - val_accuracy: 0.5445 - val_loss: 26.7452
Epoch 2/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m665s[0m 5s/step - accuracy: 0.9676 - loss: 0.9263 - val_accuracy: 0.5445 - val_loss: 22.1755
Epoch 3/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m279s[0m 2s/step - accuracy: 0.9780 - loss: 0.7868 - val_accuracy: 0.5914 - val_loss: 12.8744
Epoch 4/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 2s/step - accuracy: 0.9775 - loss: 0.7407 - val_accuracy: 0.7713 - val_loss: 6.6773
Epoch 5/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 2s/step - accuracy: 0.9776 - loss: 0.6766 - val_accuracy: 0.8325 - val_loss: 3.8048
Epoch 6/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m651s[0m 5s/step - accuracy: 0.9770 - loss: 0.6196 - val_accuracy: 0.9139 - val_loss: 1.9598
Epoch 7/25
[1m131/

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 2s/step - accuracy: 0.9090 - loss: 2.6395 - val_accuracy: 0.6919 - val_loss: 2.4172
Epoch 2/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m276s[0m 2s/step - accuracy: 0.9679 - loss: 0.8916 - val_accuracy: 0.7426 - val_loss: 3.5553
Epoch 3/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 2s/step - accuracy: 0.9725 - loss: 0.7150 - val_accuracy: 0.7656 - val_loss: 5.1017
Epoch 4/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m938s[0m 7s/step - accuracy: 0.9726 - loss: 0.6940 - val_accuracy: 0.8402 - val_loss: 4.2115
Epoch 5/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 2s/step - accuracy: 0.9760 - loss: 0.6415 - val_accuracy: 0.8469 - val_loss: 3.5209
Epoch 6/25
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m492s[0m 4s/step - accuracy: 0.9811 - loss: 0.6133 - val_accuracy: 0.8880 - val_loss: 2.4216
Epoch 7/25
[1m131/131