<a href="https://colab.research.google.com/github/Anum-Ilyas9/Assignment2/blob/main/Assignment1_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Discrepancies
1. **Backbone Network**:
   - Paper: HRNet (multi-resolution fusion).
   - Implementation: ResNet101V2 (may lack fine-grained multi-resolution feature preservation).

2. **Classifier**:
   - Paper: DRBM + Softmax.
   - Implementation: Dense layer with sigmoid (simpler but less powerful for complex multi-label tasks).

3. **Preprocessing**:
   - Paper: Circular cropping, CLAHE, noise removal.
   - Implementation: Basic resizing and normalization.

4. **Augmentation**:
   - Paper: Comprehensive, addressing imbalances.
   - Implementation: Likely less diverse.


In [None]:
# Import Libraries
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
import pandas as pd
import os
import numpy as np
import cv2
from google.colab import drive
import zipfile

# Mount Google Drive
drive.mount('/content/drive')

# Path to the dataset zip file in Google Drive
zip_file_path = '/content/drive/My Drive/Dataset.zip'  # Update this path

# Extract the Dataset
dataset_extract_path = './Dataset'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(dataset_extract_path)
    print("Dataset extracted!")

# Path to annotations.csv and images folder within the dataset folder
annotation_file_path = './Dataset/Dataset/annotation.csv'
images_folder_path = './Dataset/Images'

# Load the Annotations File
annotations = pd.read_csv(annotation_file_path)

# Filter annotations to include only rows with existing image files
def filter_valid_annotations(annotations, images_folder):
    valid_rows = []
    for _, row in annotations.iterrows():
        left_img_path = os.path.join(images_folder, row['Left-Fundus'])
        right_img_path = os.path.join(images_folder, row['Right-Fundus'])
        if os.path.exists(left_img_path) and os.path.exists(right_img_path):
            valid_rows.append(row)
        else:
            print(f"Missing files: {left_img_path} or {right_img_path}")
    return pd.DataFrame(valid_rows)

# Clean the annotations DataFrame
annotations = filter_valid_annotations(annotations, images_folder_path)
print(f"Filtered annotations: {len(annotations)} rows remain.")

# Extract unique classes from both diagnostic keyword columns
all_classes = set()
for label_col in ['Left-Diagnostic Keywords', 'Right-Diagnostic Keywords']:
    for label_str in annotations[label_col]:
        classes = label_str.split(",")  # Split multi-label strings
        all_classes.update([cls.strip() for cls in classes])

class_to_index = {cls: idx for idx, cls in enumerate(sorted(all_classes))}
num_labels = len(class_to_index)
print("Class to Index Mapping:", class_to_index)

# Custom Data Generator for Paired Inputs
class PairedDataGenerator(Sequence):
    def __init__(self, annotations, images_folder, batch_size, target_size, validation_split, subset, class_to_index):
        self.annotations = annotations
        self.images_folder = images_folder
        self.batch_size = batch_size
        self.target_size = target_size
        self.validation_split = validation_split
        self.subset = subset
        self.class_to_index = class_to_index

        # Split dataset into training and validation
        self.data = self.annotations.sample(frac=1, random_state=42)  # Shuffle the dataset
        val_split_idx = int(len(self.data) * (1 - self.validation_split))
        if self.subset == 'training':
            self.data = self.data.iloc[:val_split_idx]
        elif self.subset == 'validation':
            self.data = self.data.iloc[val_split_idx:]

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))

    def __getitem__(self, idx):
        batch_data = self.data.iloc[idx * self.batch_size:(idx + 1) * self.batch_size]
        left_images, right_images, labels = [], [], []

        for _, row in batch_data.iterrows():
            left_img_path = os.path.join(self.images_folder, row['Left-Fundus'])
            right_img_path = os.path.join(self.images_folder, row['Right-Fundus'])

            if os.path.exists(left_img_path) and os.path.exists(right_img_path):
                left_images.append(self.load_image(left_img_path))
                right_images.append(self.load_image(right_img_path))

                # Combine left and right diagnostic keywords into a single multi-hot vector
                label_strs = row['Left-Diagnostic Keywords'] + ',' + row['Right-Diagnostic Keywords']
                multi_hot = np.zeros(len(self.class_to_index), dtype=np.float32)
                for label in label_strs.split(","):
                    multi_hot[self.class_to_index[label.strip()]] = 1.0
                labels.append(multi_hot)

        left_images = np.array(left_images)
        right_images = np.array(right_images)
        labels = np.array(labels)

        return ({"input_left": left_images, "input_right": right_images}, labels)

    def load_image(self, path):
        img = cv2.imread(path)
        img = cv2.resize(img, self.target_size)
        img = img / 255.0  # Normalize pixel values
        return img

# Parameters
batch_size = 32
target_size = (224, 224)

# Load Training and Validation Data
train_data = PairedDataGenerator(
    annotations=annotations,
    images_folder=images_folder_path,
    batch_size=batch_size,
    target_size=target_size,
    validation_split=0.2,
    subset='training',
    class_to_index=class_to_index
)

val_data = PairedDataGenerator(
    annotations=annotations,
    images_folder=images_folder_path,
    batch_size=batch_size,
    target_size=target_size,
    validation_split=0.2,
    subset='validation',
    class_to_index=class_to_index
)

# Debugging: Inspect a single batch
batch = train_data[0]
print("Batch inputs:", batch[0].keys())
print("Left input shape:", batch[0]['input_left'].shape)
print("Right input shape:", batch[0]['input_right'].shape)
print("Labels shape:", batch[1].shape)
print("First label (multi-hot):", batch[1][0])

# Build Backbone Network
def build_backbone():
    backbone = tf.keras.applications.ResNet101V2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    backbone.trainable = False
    return backbone

# SENet Block
def senet_block(input_tensor, reduction_ratio=16):
    filters = input_tensor.shape[-1]
    se = layers.GlobalAveragePooling2D()(input_tensor)
    se = layers.Dense(filters // reduction_ratio, activation='relu')(se)
    se = layers.Dense(filters, activation='sigmoid')(se)
    se = layers.multiply([input_tensor, se])
    return se

# Attention Block
def attention_block(input_tensor):
    attention = layers.Conv2D(filters=input_tensor.shape[-1], kernel_size=(3, 3), padding='same')(input_tensor)
    attention = layers.ReLU()(attention)
    attention = layers.BatchNormalization()(attention)
    return layers.add([input_tensor, attention])

# Classification Network
def build_classifier(output_units):
    model = models.Sequential([
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(output_units, activation='sigmoid')  # Multi-label classification
    ])
    return model

# Full Fundus-DeepNet Model
def build_fundus_deepnet(output_units):
    input_left = layers.Input(shape=(224, 224, 3), name="input_left")
    input_right = layers.Input(shape=(224, 224, 3), name="input_right")

    # Backbone for feature extraction
    backbone = build_backbone()
    left_features = backbone(input_left)
    right_features = backbone(input_right)

    # Attention and SENet blocks
    left_features = attention_block(left_features)
    right_features = attention_block(right_features)

    left_features = senet_block(left_features)
    right_features = senet_block(right_features)

    # Feature fusion
    fused_features = layers.multiply([left_features, right_features])

    # Classifier
    classifier = build_classifier(output_units)
    output = classifier(fused_features)

    # Build and compile the model
    model = models.Model(inputs={"input_left": input_left, "input_right": input_right}, outputs=output)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Training Function
def train_model(model, train_data, val_data, epochs=5):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(train_data, validation_data=val_data, epochs=epochs, callbacks=[early_stopping])
    return history

# Main Script to Train the Model
model = build_fundus_deepnet(num_labels)
history = train_model(model, train_data, val_data, epochs=5)

# Save the Trained Model
model.save('fundus_deepnet_model.h5')

# Evaluate the Model
test_loss, test_accuracy = model.evaluate(val_data)
print(f"Test Accuracy: {test_accuracy}")


In [None]:
# Calculate Additional Metrics
from sklearn.metrics import precision_score, recall_score, f1_score

# Get predictions
predictions = model.predict(val_generator)
predictions = (predictions > 0.5).astype(int)

# Extract true labels
true_labels = np.concatenate([batch[1] for batch in val_generator], axis=0)

# Calculate metrics
precision = precision_score(true_labels, predictions, average='macro')
recall = recall_score(true_labels, predictions, average='macro')
f1 = f1_score(true_labels, predictions, average='macro')

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

