In [42]:
import pandas as pd
import numpy as np
import os

def normalize_path(path):
    return path.replace("\\", "/")

def world_to_voxel(world_coord, origin, spacing):
    return np.round((world_coord - origin) / spacing).astype(int)

def extract_negative_patches_from_candidates(
    candidates_csv,
    annotations_csv,
    metadata_csv,
    output_folder,
    patch_size=32,
    min_distance_mm=5.0
):
    os.makedirs(output_folder, exist_ok=True)

    candidates = pd.read_csv(candidates_csv)
    annotations = pd.read_csv(annotations_csv)
    metadata = pd.read_csv(metadata_csv)

    annotations_grouped = annotations.groupby("seriesuid")
    metadata.set_index("case_id", inplace=True)

    for _, row in candidates.iterrows():
        if row['class'] != 0:
            continue  # We only want negatives

        seriesuid = row['seriesuid']
        if seriesuid not in metadata.index:
            continue

        # Get scan info
        meta = metadata.loc[seriesuid]
        origin = np.array([meta["origin_z"], meta["origin_y"], meta["origin_x"]])
        spacing = np.array([meta["spacing_z"], meta["spacing_y"], meta["spacing_x"]])
        
        raw_path = meta["path"]
        normalized_path = normalize_path(raw_path)
        image = np.load(normalized_path) #!!!

        # Get voxel location of the candidate
        candidate_world = np.array([row['coordZ'], row['coordY'], row['coordX']])
        candidate_voxel = world_to_voxel(candidate_world, origin, spacing)
        z, y, x = candidate_voxel
        half = patch_size // 2

        # Check if it's near any GT nodule
        if seriesuid in annotations_grouped.groups:
            for _, ann in annotations_grouped.get_group(seriesuid).iterrows():
                ann_world = np.array([ann['coordZ'], ann['coordY'], ann['coordX']])
                dist = np.linalg.norm(candidate_world - ann_world)
                if dist < min_distance_mm:
                    break  # Too close — skip
            else:
                pass  # No close nodules
                continue  # Don't skip
            continue  # Skip due to overlap

        # Extract patch
        patch = image[
            max(0, z - half): z + half,
            max(0, y - half): y + half,
            max(0, x - half): x + half
        ]

        # Pad if needed
        patch = np.pad(
            patch,
            [(0, patch_size - patch.shape[0]),
             (0, patch_size - patch.shape[1]),
             (0, patch_size - patch.shape[2])],
            mode='constant', constant_values=0
        )

        save_path = os.path.join(output_folder, f"{seriesuid}_{z}_{y}_{x}_neg.npy")
        np.save(save_path, patch)
        print(f"Saved negative patch: {save_path}")

def extract_patches_from_annotations(
    annotation_csv, 
    metadata_csv, 
    output_folder, 
    patch_size=32
):
    os.makedirs(output_folder, exist_ok=True)

    annotations = pd.read_csv(annotation_csv)
    metadata = pd.read_csv(metadata_csv)

    for _, row in annotations.iterrows():
        case_id = row['seriesuid']
        meta = metadata[metadata["case_id"] == case_id]

        if meta.empty:
            continue

        meta = meta.iloc[0]
        origin = np.array([meta["origin_z"], meta["origin_y"], meta["origin_x"]])
        spacing = np.array([meta["spacing_z"], meta["spacing_y"], meta["spacing_x"]])

        raw_path = meta["path"]
        normalized_path = normalize_path(raw_path)
        image = np.load(normalized_path) #!!!

        world_coord = np.array([row['coordZ'], row['coordY'], row['coordX']])
        voxel_coord = world_to_voxel(world_coord, origin, spacing)
        z, y, x = voxel_coord
        half = patch_size // 2

        patch = image[
            max(0, z - half): z + half,
            max(0, y - half): y + half,
            max(0, x - half): x + half
        ]

        patch = np.pad(patch,
                       [(0, patch_size - patch.shape[0]),
                        (0, patch_size - patch.shape[1]),
                        (0, patch_size - patch.shape[2])],
                       mode='constant', constant_values=0)

        save_path = os.path.join(output_folder, f"{case_id}_{z}_{y}_{x}_pos.npy")
        np.save(save_path, patch)
        print(f"Saved patch: {save_path}")

In [None]:
LUNA_PATH = r"C:\Users\azizd\python\pytorch\data\bitirme_projesi\archive"
PREPROCESSED_OUTPUT = r"C:\Users\azizd\python\pytorch\final_project\output\preprocessed_luna16"
ANNOTATION_FILE = LUNA_PATH+"/annotations.csv"
CANDIDATES_FILE = LUNA_PATH+"/candidates_V2/candidates_V2.csv"
PATCH_OUTPUT = "../output/patches"

extract_patches_from_annotations(
    annotation_csv=ANNOTATION_FILE,
    metadata_csv=PREPROCESSED_OUTPUT+"/preprocessed_metadata.csv",
    output_folder=PATCH_OUTPUT,
    patch_size=32
)