#### Load Images and Masks

1. First, we need to load the images, masks, and metadata from their respective directories.
2. Images : Each image is an angiogram in JPEG format.
3. Masks : Each mask is a binary image, where white pixels (value 255) indicate the presence of an anomaly (stenosis or aneurysm).
4. Metadata : The metadata.json file contains the coordinates of anomalies (stenosis and aneurysm) for each image.

In [2]:
import os
import cv2
import json
import numpy as np

image_dir = 'images'
mask_dir = 'masks'
metadata_file = 'metadata.json'

# Load metadata
with open(metadata_file, 'r') as f:
    metadata = json.load(f)

In [3]:
def load_and_preprocess_data(image_dir, mask_dir, metadata, target_size=(256, 256)):
    images = []
    masks = []
    labels = []
    
    for entry in metadata:
        image_id = entry["image_id"]
        image_path = os.path.join(image_dir, f"{image_id}.jpg")
        mask_path = os.path.join(mask_dir, f"{image_id}.png")
        
        # Load image and mask
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        
        if image is not None and mask is not None:
            # Resize image and mask
            image_resized = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
            mask_resized = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
            
            # Normalize image
            image_normalized = image_resized / 255.0
            
            # Append data
            images.append(image_normalized)
            masks.append(mask_resized)
            
            # Label based on aneurysm presence
            anomalies = entry["anomalies"]["aneurysm"]
            if anomalies:
                labels.append(1)  # Aneurysm present
            else:
                labels.append(0)  # No aneurysm
    
    return np.array(images), np.array(masks), np.array(labels)


In [4]:
from sklearn.model_selection import train_test_split
# Load and preprocess data
images, masks, labels = load_and_preprocess_data(image_dir, mask_dir, metadata, target_size=(256, 256))

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

In [5]:
# Print the shapes of the datasets
print(f"Training images shape: {X_train.shape}")
print(f"Validation images shape: {X_val.shape}")

Training images shape: (334, 256, 256)
Validation images shape: (84, 256, 256)


In [6]:
print(f"Training labels shape: {y_train.shape}")
print(f"Validation labels shape: {y_val.shape}")

Training labels shape: (334,)
Validation labels shape: (84,)


In [7]:
import numpy as np

np.save('X_train.npy', X_train)
np.save('y_train.npy', y_train)
np.save('X_val.npy', X_val)
np.save('y_val.npy', y_val)