In [24]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd

# ============ 1️⃣ LOAD AND PREPROCESS DATA ============ #
def load_data(image_folder, label_folder):
    """
    Load preprocessed images and labels from .npy files.
    """
    images = []
    labels = []
    image_files = sorted([f for f in os.listdir(image_folder) if f.startswith("image")])
    label_files = sorted([f for f in os.listdir(label_folder) if f.startswith("label")])

    for img_file, lbl_file in zip(image_files, label_files):
        img = np.load(os.path.join(image_folder, img_file))
        lbl = np.load(os.path.join(label_folder, lbl_file))
        images.append(img)
        labels.append(lbl)
    
    return np.array(images), np.array(labels)

# Paths to preprocessed data
image_folder = "preprocessed_data"
label_folder = "preprocessed_data"

X_train, Y_train = load_data(image_folder, label_folder)

# Normalize images (0-1) and convert labels to binary (0 or 1)
X_train = X_train / 255.0
Y_train = (Y_train > 0).astype(np.uint8)  # Convert to binary mask

# Expand dimensions for model compatibility
X_train = np.expand_dims(X_train, axis=-1)
Y_train = np.expand_dims(Y_train, axis=-1)

print(f"✅ Data Loaded: {X_train.shape[0]} images, Shape: {X_train.shape[1:]}")

# ============ 2️⃣ LIGHTWEIGHT U-NET MODEL ============ #
def build_unet(input_shape=(256, 256, 1)):
    inputs = layers.Input(shape=input_shape)

    # Encoder (downsampling)
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    # Bottleneck
    b = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p3)

    # Decoder (upsampling)
    u1 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(b)
    u1 = layers.Concatenate()([u1, c3])
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u1)

    u2 = layers.Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(c4)
    u2 = layers.Concatenate()([u2, c2])
    c5 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u2)

    u3 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(c5)
    u3 = layers.Concatenate()([u3, c1])
    c6 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(u3)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c6)

    return models.Model(inputs, outputs)

model = build_unet()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# ============ 3️⃣ TRAIN THE MODEL ============ #
EPOCHS = 10  # Adjust if needed
BATCH_SIZE = 4  # Reduce batch size for memory optimization

model.fit(X_train, Y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.1)

# ============ 4️⃣ PREDICT ON TEST DATA ============ #
def predict_and_save(model, test_folder, output_folder):
    test_images = sorted([f for f in os.listdir(test_folder) if f.startswith("image")])
    
    for test_file in test_images:
        img = np.load(os.path.join(test_folder, test_file)) / 255.0
        img = np.expand_dims(img, axis=(0, -1))  # Add batch and channel dimensions
        
        pred_mask = model.predict(img)[0]  # Get prediction
        pred_mask = (pred_mask > 0.5).astype(np.uint8)  # Threshold to binary
        
        np.save(os.path.join(output_folder, test_file.replace("image", "pred_mask")), pred_mask)
        print(f"✅ Saved: {test_file}")

test_folder = "preprocessed_test_data"  # Update this if test data is in a different folder
output_folder = "output_predictions"
os.makedirs(output_folder, exist_ok=True)
predict_and_save(model, test_folder, output_folder)

# ============ 5️⃣ GENERATE SUBMISSION FILE ============ #
def rle_encode(mask):
    """
    Convert a binary mask to Run-Length Encoding (RLE).
    """
    pixels = mask.flatten(order='F')
    pixels = np.concatenate([[0], pixels, [0]])  # Add sentinel values
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return " ".join(str(x) for x in runs) if runs.size else "1 0"

submission = []
for file in sorted(os.listdir(output_folder)):
    if file.startswith("pred_mask"):
        slice_id = file.replace("pred_mask_", "").replace(".npy", "").replace("-", "_")
        mask = np.load(os.path.join(output_folder, file))
        rle_mask = rle_encode(mask)
        submission.append([slice_id, rle_mask])

df_submission = pd.DataFrame(submission, columns=['id', 'rle'])
df_submission.to_csv("submission.csv", index=False)

print("✅ Submission file created: submission.csv")


MemoryError: Unable to allocate 270. MiB for an array with shape (35389440,) and data type float64