# Dataset Creation Tool
Once your datasets have been properly curated in the training_set_management_tool, you can now use these tools to convert datasets to coco, and merge them as well. 

In [7]:
from coco_tools import silt_to_coco, satsim_to_coco, merge_coco
from preprocess_functions import channel_mixture_A, channel_mixture_B, channel_mixture_C, adaptiveIQR, zscale, iqr_clipped, iqr_log, raw_file
from preprocess_functions import _median_column_subtraction, _median_row_subtraction, _background_subtract
from utilities import get_folders_in_directory, summarize_local_files, clear_local_caches, clear_local_cache, apply_bbox_corrections
import os
from utilities import clear_local_caches

In [8]:
T1 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-08"
T2 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-29"
T3 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-09"
T4 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-10"
T5 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-11"
T6 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-13"
T7 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-12"
T8 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-08-20"
T9 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-08-04"

E1 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-11-15"
E2 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-11-07"
E3 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-15"
E4 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-23"
E5 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-12-17"
E6 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-30"
E7 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-09-13"
E8 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-11-26"
E9 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-12-20"
E10 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-09-25"
E11 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-06"
E12 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-12-30"
E13 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-12-06"
E14 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-05-25"
E15 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-05-10"
E16 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-05-03"
E17 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-10"
E18 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-05-16"
E19 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-07"
E20 = "/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-23"

final_data_path="/data/Sentinel_Datasets/Finalized_datasets/"

training_set_origins = [T1, T2, T3, T4, T5, T6, T7, T8, T9]
training_set_output_path = os.path.join(final_data_path, f"LMNT01Sat_Training_Channel_Mixture_C")

eval_origins = [E1, E2, E3, E4, E5, E6, E7, E8, E9, E10, E11, E12, E13, E14, E15, E16, E17, E18, E19, E20]
eval_origins = [ E19, E20]
eval_finals = [os.path.join(final_data_path, f"{os.path.basename(ESet)}_Channel_Mixture_C_Eval") for ESet in eval_origins]

preprocess_func = channel_mixture_C


## Convert SILT to COCO

### Convert Single Path

In [None]:
# silt_to_coco(Process_pathB, include_sats=False, include_stars=True, zip=False, notes="RME01 dataset with stars only")
silt_to_coco(Process_pathB, include_sats=True, include_stars=False, convert_png=True, process_func=preprocess_func, notes="Channel Mixture of Adaptive IQR, raw, and zscaled. ")

Converting Silt to COCO: 100%|██████████| 712/712 [00:15<00:00, 47.46it/s]
Copying images: 100%|██████████| 712/712 [19:28<00:00,  1.64s/it]  


### Convert Multiple Paths

In [None]:
for path in training_set_origins:
    print(path)
    silt_to_coco(path, include_sats=True, include_stars=False, convert_png=True, process_func=preprocess_func, notes="Mixture of ZScale, raw, and log-IQR")

/data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-08


Converting Silt to COCO: 100%|██████████| 281/281 [00:10<00:00, 26.74it/s]
Copying images:  48%|████▊     | 134/281 [03:36<03:46,  1.54s/it]

## Convert SATSim to COCO

In [None]:

satsim_to_coco(satsim_path,include_sats=False, include_stars=True, convert_png=True, process_func=None, notes="")

Converting Satsim to COCO: 100%|██████████| 169/169 [00:10<00:00, 15.74it/s]
Copying images: 100%|██████████| 1012/1012 [01:23<00:00, 12.10it/s]


## Merge Datasets

### Merge Training Sets

In [5]:
#For Creating Training Sets
training_set_origins = [T1, T2, T3, T4, T5, T6, T7, T8, T9]
merge_coco(training_set_origins, training_set_output_path, train_test_split=True, train_ratio=.9, val_ratio=.1, test_ratio=0, notes="LMNT01_trainig with multi channel C satellites.Contains about 6400 Samples ")
clear_local_caches(os.path.dirname(training_set_origins[0]))

Processing COCO Datasets: 100%|██████████| 9/9 [00:00<00:00, 60.68it/s]
Copying images: 100%|██████████| 6077/6077 [00:25<00:00, 242.55it/s]
Copying images: 100%|██████████| 675/675 [00:03<00:00, 221.87it/s]
Copying images: 100%|██████████| 1/1 [00:00<00:00, 407.89it/s]


Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-12/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-12/images
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-08-20/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-08-20/images
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-08/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-08/images
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-29/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-29/images
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-09/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-09/images
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-10/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-10/images
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-07-11/annotations
Removed: /data/Sentinel_Datas

### Merge Evaluation Sets

In [9]:
# For creating Evaluation Sets
for origin, destination in zip(eval_origins, eval_finals):
    silt_to_coco(origin, include_sats=True, include_stars=False, convert_png=True, process_func=preprocess_func, notes="Mixture of ZScale, raw, and log-IQR")
    merge_coco([origin], destination, train_test_split=False, train_ratio=0, val_ratio=0, test_ratio=1, notes="LMNT01 dataset with satellites and MultiChannel C")
    clear_local_caches(os.path.dirname(origin))

Converting Silt to COCO: 100%|██████████| 601/601 [00:23<00:00, 25.73it/s]
Copying images: 100%|██████████| 601/601 [20:10<00:00,  2.01s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 77.57it/s]
Copying images: 100%|██████████| 601/601 [00:02<00:00, 256.57it/s]


Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-07/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-07/images


Converting Silt to COCO: 100%|██████████| 2452/2452 [01:48<00:00, 22.51it/s]
Copying images: 100%|██████████| 2452/2452 [1:23:14<00:00,  2.04s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 19.21it/s]
Copying images: 100%|██████████| 2452/2452 [00:09<00:00, 262.29it/s]


Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-23/annotations
Removed: /data/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2025-01-23/images
