# Install pkgs

**Note:** This is training notebook only. Inference ain't included in . 
Anybody who wants to use this notebook for inference purposes is most welcome.

In [1]:
# Make a copick project
import os
import shutil

config_blob = """{
    "name": "czii_cryoet_mlchallenge_2024",
    "description": "2024 CZII CryoET ML Challenge training data.",
    "version": "1.0.0",

    "pickable_objects": [
        {
            "name": "apo-ferritin",
            "is_particle": true,
            "pdb_id": "4V1W",
            "label": 1,
            "color": [  0, 117, 220, 128],
            "radius": 60,
            "map_threshold": 0.0418
        },
        {
            "name": "beta-amylase",
            "is_particle": true,
            "pdb_id": "1FA2",
            "label": 2,
            "color": [153,  63,   0, 128],
            "radius": 65,
            "map_threshold": 0.035
        },
        {
            "name": "beta-galactosidase",
            "is_particle": true,
            "pdb_id": "6X1Q",
            "label": 3,
            "color": [ 76,   0,  92, 128],
            "radius": 90,
            "map_threshold": 0.0578
        },
        {
            "name": "ribosome",
            "is_particle": true,
            "pdb_id": "6EK0",
            "label": 4,
            "color": [  0,  92,  49, 128],
            "radius": 150,
            "map_threshold": 0.0374
        },
        {
            "name": "thyroglobulin",
            "is_particle": true,
            "pdb_id": "6SCJ",
            "label": 5,
            "color": [ 43, 206,  72, 128],
            "radius": 130,
            "map_threshold": 0.0278
        },
        {
            "name": "virus-like-particle",
            "is_particle": true,
            "pdb_id": "6N4V",            
            "label": 6,
            "color": [255, 204, 153, 128],
            "radius": 135,
            "map_threshold": 0.201
        }
    ],

    "overlay_root": "/home/naoya/kaggle/czii/input/extra_data/overlay",

    "overlay_fs_args": {
        "auto_mkdir": true
    },

    "static_root": "/home/naoya/kaggle/czii/input/extra_data/czii"
}"""

copick_config_path = "/home/naoya/kaggle/czii/input/czii-cryo-et-object-identification/copick.config"
output_overlay = "/home/naoya/kaggle/czii/input/extra_data/overlay"

with open(copick_config_path, "w") as f:
    f.write(config_blob)
    
# Update the overlay
# Define source and destination directories
source_dir = '/home/naoya/kaggle/czii/input/extra_data/czii_static'
destination_dir = '/home/naoya/kaggle/czii/input/extra_data/overlay'

# Walk through the source directory
for root, dirs, files in os.walk(source_dir):
    # Create corresponding subdirectories in the destination
    relative_path = os.path.relpath(root, source_dir)
    target_dir = os.path.join(destination_dir, relative_path)
    os.makedirs(target_dir, exist_ok=True)
    
    # Copy and rename each file
    for file in files:
        if file.startswith("curation_0_"):
            new_filename = file
        else:
            new_filename = f"curation_0_{file}"
            
        
        # Define full paths for the source and destination files
        source_file = os.path.join(root, file)
        destination_file = os.path.join(target_dir, new_filename)
        
        # Copy the file with the new name
        shutil.copy2(source_file, destination_file)
        print(f"Copied {source_file} to {destination_file}")

Copied /home/naoya/kaggle/czii/input/extra_data/czii_static/ExperimentRuns/TS_1/Picks/apo-ferritin.json to /home/naoya/kaggle/czii/input/extra_data/overlay/ExperimentRuns/TS_1/Picks/curation_0_apo-ferritin.json
Copied /home/naoya/kaggle/czii/input/extra_data/czii_static/ExperimentRuns/TS_1/Picks/beta-amylase.json to /home/naoya/kaggle/czii/input/extra_data/overlay/ExperimentRuns/TS_1/Picks/curation_0_beta-amylase.json
Copied /home/naoya/kaggle/czii/input/extra_data/czii_static/ExperimentRuns/TS_1/Picks/thyroglobulin.json to /home/naoya/kaggle/czii/input/extra_data/overlay/ExperimentRuns/TS_1/Picks/curation_0_thyroglobulin.json
Copied /home/naoya/kaggle/czii/input/extra_data/czii_static/ExperimentRuns/TS_1/Picks/virus-like-particle.json to /home/naoya/kaggle/czii/input/extra_data/overlay/ExperimentRuns/TS_1/Picks/curation_0_virus-like-particle.json
Copied /home/naoya/kaggle/czii/input/extra_data/czii_static/ExperimentRuns/TS_1/Picks/beta-galactosidase.json to /home/naoya/kaggle/czii/inp

In [2]:
import numpy as np

import copick
from tqdm import tqdm

# Prepare the dataset
## 1. Get copick root

In [3]:
root = copick.from_file(copick_config_path)

copick_user_name = "copickUtils"
copick_segmentation_name = "paintedPicks"
voxel_size = 10
tomo_type = "denoised"

## 2. Generate multi-class segmentation masks from picks, and saved them to the copick overlay directory (one-time)

In [4]:
from copick_utils.segmentation import segmentation_from_picks
import copick_utils.writers.write as write
from collections import defaultdict

# Just do this once
generate_masks = True

if generate_masks:
    target_objects = defaultdict(dict)
    for object in root.pickable_objects:
        if object.is_particle:
            target_objects[object.name]['label'] = object.label
            target_objects[object.name]['radius'] = object.radius


    for run in tqdm(root.runs):
        tomo = run.get_voxel_spacing(10)
        tomo = tomo.get_tomogram(tomo_type).numpy()
        target = np.zeros(tomo.shape, dtype=np.uint8)
        for pickable_object in root.pickable_objects:
            pick = run.get_picks(object_name=pickable_object.name, user_id="curation")
            if len(pick):  
                target = segmentation_from_picks.from_picks(pick[0], 
                                                            target, 
                                                            target_objects[pickable_object.name]['radius'] * 0.8,
                                                            target_objects[pickable_object.name]['label']
                                                            )
        write.segmentation(run, target, copick_user_name, name=copick_segmentation_name)

  tomo = tomo.get_tomogram(tomo_type).numpy()
100%|██████████| 27/27 [00:09<00:00,  2.97it/s]


## 3. Get tomograms and their segmentaion masks (from picks) arrays

In [5]:
data_dicts = []
for run in tqdm(root.runs):
    tomogram = run.get_voxel_spacing(voxel_size).get_tomogram(tomo_type).numpy()
    segmentation = run.get_segmentations(name=copick_segmentation_name, user_id=copick_user_name, voxel_size=voxel_size, is_multilabel=True)[0].numpy()
    data_dicts.append({"name": run.name, "image": tomogram, "label": segmentation})
    
print(np.unique(data_dicts[0]['label']))

  tomogram = run.get_voxel_spacing(voxel_size).get_tomogram(tomo_type).numpy()
100%|██████████| 27/27 [00:06<00:00,  4.28it/s]


[0 1 2 3 4 5 6]


In [6]:
data_dicts[0]['label'].shape

(200, 630, 630)

In [7]:
data_dicts[0]['image'].shape

(200, 630, 630)

In [10]:
output_dir = f"/home/naoya/kaggle/czii/input/extra_data/numpy/{tomo_type}"

for i in range(len(data_dicts)):
    image_path = os.path.join(output_dir, f"train_image_{data_dicts[i]['name']}.npy")
    label_path = os.path.join(output_dir, f"train_label_{data_dicts[i]['name']}.npy")
    
    with open(image_path, 'wb') as f:
        np.save(f, data_dicts[i]['image'])
        
    with open(label_path, 'wb') as f:
        np.save(f, data_dicts[i]['label'])