In [1]:
!pip install zarr




In [2]:
# Standard libraries
import os
import json

# Third-party libraries
import zarr
import numpy as np
import pandas as pd
import tensorflow as tf

# TensorFlow/Keras imports
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Conv3D,
    MaxPooling3D,
    UpSampling3D,
    Concatenate,
    ZeroPadding3D,
    Cropping3D
)



In [3]:
# --- Paths ---
ROOT_DIR = "/kaggle/input/czii-cryo-et-object-identification"
TRAIN_DIR = os.path.join(ROOT_DIR, "train/static/ExperimentRuns")
TRAIN_LABELS_DIR = os.path.join(ROOT_DIR, "train/overlay/ExperimentRuns")
TEST_DIR = os.path.join(ROOT_DIR, "test/static/ExperimentRuns")
SUBMISSION_PATH = "submission.csv"


In [4]:
def load_zarr(filepath):
    """Load a Zarr file and return the highest resolution data."""
    if not os.path.exists(filepath):
        print(f"Zarr file not found: {filepath}")
        return None
    zarr_file = zarr.open(filepath, mode='r')
    return zarr_file["0"]  # Level 0 has the highest resolution


In [5]:
def load_ground_truth(filepath):
    """Load ground truth particle coordinates from nested JSON."""
    if not os.path.exists(filepath):
        print(f"Ground truth file not found: {filepath}")
        return []
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
        points = data.get("points", [])
        coordinates = [[p['location']['x'], p['location']['y'], p['location']['z']]
                       for p in points if 'location' in p]
        return np.array(coordinates)
    except Exception as e:
        print(f"Error loading JSON file {filepath}: {e}")
        return []


In [6]:
def preprocess_volume(volume):
    """Normalize and reshape the 3D volume."""
    if volume is None:
        return None
    volume = volume / np.max(volume)
    return np.expand_dims(volume, axis=-1)


In [7]:
def align_shapes(tensor1, tensor2):
    """Align shapes for concatenation using Cropping3D or ZeroPadding3D."""
    shape1 = tensor1.shape[1:4]  # Static shapes in Keras
    shape2 = tensor2.shape[1:4]

    cropping = []
    padding = []

    for s1, s2 in zip(shape1, shape2):
        diff = s1 - s2
        if diff > 0:  # Crop tensor1
            cropping.append((diff // 2, diff - diff // 2))
            padding.append((0, 0))  # No padding needed for tensor2
        elif diff < 0:  # Pad tensor2
            cropping.append((0, 0))  # No cropping needed for tensor1
            padding.append((-diff // 2, -diff - (-diff // 2)))
        else:
            cropping.append((0, 0))
            padding.append((0, 0))

    # Apply Cropping3D or ZeroPadding3D
    if any(c[0] > 0 or c[1] > 0 for c in cropping):
        tensor1 = Cropping3D(cropping=cropping)(tensor1)
    if any(p[0] > 0 or p[1] > 0 for p in padding):
        tensor2 = ZeroPadding3D(padding=padding)(tensor2)

    return tensor1, tensor2

In [8]:
def crop_tensor(tensor, target_shape):
    """Crop a tensor to match the target shape."""
    cropping = [(0, 0)]  # For batch dimension
    for dim, (src, tgt) in enumerate(zip(tensor.shape[1:4], target_shape[1:4])):
        crop_size = src - tgt
        if crop_size > 0:
            cropping.append((crop_size // 2, crop_size - crop_size // 2))
        else:
            cropping.append((0, 0))
    return Cropping3D(cropping=cropping[1:])(tensor)

In [9]:
def unet_3d(input_shape):
    """3D U-Net model with corrected shape alignment for concatenation."""
    inputs = Input(shape=input_shape)

    # Encoder
    conv1 = Conv3D(16, (3, 3, 3), activation='relu', padding='same')(inputs)
    pool1 = MaxPooling3D((2, 2, 2))(conv1)

    conv2 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(pool1)
    pool2 = MaxPooling3D((2, 2, 2))(conv2)

    # Bottleneck
    conv3 = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(pool2)

    # Decoder
    up2 = UpSampling3D((2, 2, 2))(conv3)
    cropped_conv2 = crop_tensor(conv2, up2.shape)
    concat2 = Concatenate()([cropped_conv2, up2])

    conv4 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(concat2)
    up1 = UpSampling3D((2, 2, 2))(conv4)
    cropped_conv1 = crop_tensor(conv1, up1.shape)
    concat1 = Concatenate()([cropped_conv1, up1])

    conv5 = Conv3D(16, (3, 3, 3), activation='relu', padding='same')(concat1)
    outputs = Conv3D(1, (1, 1, 1), activation='sigmoid')(conv5)

    return Model(inputs, outputs)



In [10]:
def load_data(train_dir, labels_dir):
    """Load training data and corresponding labels."""
    X, Y = [], []
    experiments = [exp for exp in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, exp))]

    for exp in experiments:
        volume_path = os.path.join(train_dir, exp, "VoxelSpacing10.000", "denoised.zarr")
        labels_dir_path = os.path.join(labels_dir, exp, "Picks")

        if not os.path.exists(volume_path) or not os.path.exists(labels_dir_path):
            continue

        print(f"Loading volume from: {volume_path}")
        volume = load_zarr(volume_path)
        volume = preprocess_volume(volume)
        if volume is None:
            continue

        experiment_labels = []
        for file in os.listdir(labels_dir_path):
            if file.endswith(".json"):
                particle_path = os.path.join(labels_dir_path, file)
                labels = load_ground_truth(particle_path)
                experiment_labels.extend(labels)

        if len(experiment_labels) > 0:
            X.append(volume)
            Y.append(experiment_labels)
    print(f"Total Loaded: {len(X)} volumes and {len(Y)} label sets.")
    return np.array(X), Y


In [11]:
def crop_to_match(y_true, y_pred):
    """Crop the target (y_true) to match the spatial dimensions of the prediction (y_pred)."""
    target_shape = tf.shape(y_pred)[1:4]  # Get spatial dimensions of prediction
    y_true_cropped = tf.image.resize_with_crop_or_pad(
        y_true, target_shape[0], target_shape[1], target_shape[2]
    )
    return y_true_cropped

In [12]:
def train_model():
    X, Y = load_data(TRAIN_DIR, TRAIN_LABELS_DIR)
    if len(X) == 0:
        raise ValueError("No training data loaded. Check file paths.")
    input_shape = X[0].shape

    model = unet_3d(input_shape)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Placeholder binary masks (cropped to match model output)
    predictions_shape = model.output_shape[1:]  # Get output shape
    Y_masks = np.random.randint(0, 2, size=(len(X),) + predictions_shape)

    # Fit model
    model.fit(X, Y_masks, batch_size=1, epochs=5)
    return model


In [13]:
def make_submission(model, test_dir, submission_path):
    submission = []
    experiments = [exp for exp in os.listdir(test_dir) if os.path.isdir(os.path.join(test_dir, exp))]
    idx = 0

    for exp in experiments:
        volume_path = os.path.join(test_dir, exp, "VoxelSpacing10.000", "denoised.zarr")
        if not os.path.exists(volume_path):
            continue

        volume = load_zarr(volume_path)
        volume = preprocess_volume(volume)
        if volume is None:
            continue

        predictions = model.predict(np.expand_dims(volume, axis=0))
        coords = np.argwhere(predictions > 0.5)

        for coord in coords:
            submission.append([idx, exp, "unknown", coord[0], coord[1], coord[2]])
            idx += 1

    df = pd.DataFrame(submission, columns=["id", "experiment", "particle_type", "x", "y", "z"])
    df.to_csv(submission_path, index=False)


In [None]:
if __name__ == "__main__":
    print("Training Model...")
    model = train_model()

    print("Generating Submission...")
    make_submission(model, TEST_DIR, SUBMISSION_PATH)
    print("Submission saved to:", SUBMISSION_PATH)


Training Model...
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_86_3/VoxelSpacing10.000/denoised.zarr
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_6_6/VoxelSpacing10.000/denoised.zarr
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_6_4/VoxelSpacing10.000/denoised.zarr
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/denoised.zarr
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_73_6/VoxelSpacing10.000/denoised.zarr
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_99_9/VoxelSpacing10.000/denoised.zarr
Loading volume from: /kaggle/input/czii-cryo-et-object-identification/train/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/denoised.zarr
Total Loaded

In [None]:
model.summary()
