# Dataset Creation Tool
Once your datasets have been properly curated in the training_set_management_tool, you can now use these tools to convert datasets to coco, and merge them as well. 

In [1]:
from coco_tools import silt_to_coco, satsim_to_coco, merge_coco, silt_to_coco_panoptic, partition_dataset
from preprocess_functions import channel_mixture_A, channel_mixture_B, channel_mixture_C, adaptiveIQR, zscale, iqr_clipped, iqr_log, raw_file
from preprocess_functions import _median_column_subtraction, _median_row_subtraction, _background_subtract
from utilities import get_folders_in_directory, summarize_local_files, clear_local_caches, clear_local_cache, apply_bbox_corrections
import os
from utilities import clear_local_caches

### LMNT01

In [2]:
final_data_path="/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets"


dirctoryA = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/RME01-2025Data"
final_outputA = os.path.join(final_data_path, f"RME01_2025")
dirctoryB = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/RME04-2025Data"
final_outputB = os.path.join(final_data_path, f"RME04_2025")

all_origins = [dirctoryA, dirctoryB]

preprocess_func = iqr_log

## Convert SILT to COCO

### Convert Single Path

### Convert Multiple Paths

In [3]:

for path in all_origins:
    print(path)
    silt_to_coco(path, include_sats=True, include_stars=False, convert_png=True, process_func=preprocess_func, notes=f"Log_IQR_preprocessing for stability on new 2025 data from {path}")

/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/RME01-2025Data


Converting Silt to COCO: 100%|██████████| 10257/10257 [06:50<00:00, 24.98it/s]
Copying images:   2%|▏         | 249/10257 [05:04<3:24:10,  1.22s/it]


KeyboardInterrupt: 

## Convert SATSim to COCO

## Convert Silt to Panoptic COCO

## Merge Datasets

### Merge Training Sets

In [4]:
merge_coco(all_origins, training_set_output_path_LMNT01, train_test_split=True, train_ratio=.8, val_ratio=.1, test_ratio=.1, notes="Large LMNT01 Dataset ")


Processing COCO Datasets: 100%|██████████| 29/29 [00:00<00:00, 76.44it/s]
Copying images: 100%|██████████| 17054/17054 [00:31<00:00, 535.60it/s]
Copying images: 100%|██████████| 2133/2133 [00:04<00:00, 520.92it/s]
Copying images: 100%|██████████| 2128/2128 [00:04<00:00, 430.68it/s]


In [5]:
satsim_data = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/satsim_sats_dataset"
training_set_origins_RME04.append(satsim_data)
training_set_origins_LMNT01.append(satsim_data)
training_set_origins_LMNT02.append(satsim_data)

### Merging Panoptic COCO

In [None]:
#For Creating Training Sets
# training_set_origins = [T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11]
# satsim_data = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/SatsimMixtureC"
# satsim_output_data = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/SatsimMCTrain"
# origins = [training_set_origins_LMNT02, training_set_origins_LMNT01, training_set_origins_RME04]
training_set_output_path_LMNT01 = os.path.join(final_data_path, f"Panoptic_MC_LMNT01_train_30_Overlap")
training_set_output_path_LMNT02 = os.path.join(final_data_path, f"Panoptic_MC_LMNT02_train_random_overlap")
training_set_output_path_RME04 = os.path.join(final_data_path, f"Panoptic_MC_RME04_train_random_overlap")

origins = []
origins.extend(training_set_origins_LMNT01)
# origins.extend(training_set_origins_LMNT02)
# origins.extend(training_set_origins_RME04)
# origins.append(satsim_data)
print(origins)
# total_data_path = os.path.join(final_data_path, f"Panoptic_MC_AllData_train") #REDO THIS ONE

merge_coco(origins, training_set_output_path_LMNT01, train_test_split=True, train_ratio=.75, val_ratio=.15, test_ratio=0.15, notes="LMNT01 10percent overlap with no empty frames for SNR Curriculum Learning study ")
# clear_local_caches(os.path.dirname(training_set_origins[0]))

['/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-08-04', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-08-20', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-09-13', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-09-25', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-06', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-15', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-23', '/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/LMNT01_Raw/LMNT01Sat-2024-10-30']


Processing COCO Datasets: 100%|██████████| 8/8 [00:00<00:00, 15.33it/s]
Copying images: 100%|██████████| 26863/26863 [00:07<00:00, 3468.09it/s]
Copying images: 100%|██████████| 6699/6699 [00:01<00:00, 3534.65it/s]
Copying images: 0it [00:00, ?it/s]


### Merge Evaluation Sets

In [None]:
# training_set_origins_LMNT01 = [LA1, LA2, LA3, LA4, LA5, LA6, LA7] # ORIGINAL TRAINING SET DO NOT DELETE
training_set_origins_LMNT01 = [LA8, LA9, LA10, LA11, LA12, LA13, LA14, LA15]
# training_set_origins_LMNT01 = [LA1, LA2, LA3, LA4, LA5, LA6, LA7, LA10, LA11, LA12, LA13, LA14, LA15]
training_set_output_path_LMNT01 = os.path.join(final_data_path, f"Panoptic_LMNT01_No_Chipping_train")

# For creating Evaluation Sets
for origin, destination in zip(eval_origins, eval_finals):
    silt_to_coco(origin, include_sats=True, include_stars=False, convert_png=True, process_func=preprocess_func, notes="Mixture of ZScale, raw, and log-IQR")
    merge_coco([origin], destination, train_test_split=False, train_ratio=0, val_ratio=0, test_ratio=1, notes="LMNT02 dataset with satellites and MultiChannel C")
    clear_local_caches(os.path.dirname(origin))

Converting Silt to COCO: 100%|██████████| 609/609 [00:25<00:00, 23.96it/s]
Copying images: 100%|██████████| 609/609 [18:49<00:00,  1.85s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 63.17it/s]
Copying images: 100%|██████████| 609/609 [00:02<00:00, 278.34it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-09-06/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-09-06/images
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-14/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-14/images


Converting Silt to COCO: 100%|██████████| 572/572 [00:22<00:00, 25.37it/s]
Copying images: 100%|██████████| 572/572 [17:13<00:00,  1.81s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 88.10it/s]
Copying images: 100%|██████████| 572/572 [00:02<00:00, 274.83it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-14/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-14/images


Converting Silt to COCO: 100%|██████████| 522/522 [00:21<00:00, 23.98it/s]
Copying images: 100%|██████████| 522/522 [16:31<00:00,  1.90s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 75.25it/s]
Copying images: 100%|██████████| 522/522 [00:01<00:00, 285.12it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-12-07/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-12-07/images


Converting Silt to COCO: 100%|██████████| 1859/1859 [01:15<00:00, 24.72it/s]
Copying images: 100%|██████████| 1859/1859 [56:25<00:00,  1.82s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 32.79it/s]
Copying images: 100%|██████████| 1859/1859 [00:06<00:00, 270.88it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-09-14/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-09-14/images


Converting Silt to COCO: 100%|██████████| 1122/1122 [00:47<00:00, 23.60it/s]
Copying images: 100%|██████████| 1122/1122 [34:32<00:00,  1.85s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 56.50it/s]
Copying images: 100%|██████████| 1122/1122 [00:04<00:00, 264.46it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-07/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-07/images


Converting Silt to COCO: 100%|██████████| 463/463 [00:19<00:00, 23.80it/s]
Copying images: 100%|██████████| 463/463 [14:28<00:00,  1.88s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 90.92it/s]
Copying images: 100%|██████████| 463/463 [00:01<00:00, 242.80it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-08/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-08/images


Converting Silt to COCO: 100%|██████████| 570/570 [00:24<00:00, 23.34it/s]
Copying images: 100%|██████████| 570/570 [18:29<00:00,  1.95s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 93.40it/s]
Copying images: 100%|██████████| 570/570 [00:02<00:00, 266.13it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-30/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-30/images


Converting Silt to COCO: 100%|██████████| 756/756 [00:32<00:00, 23.61it/s]
Copying images: 100%|██████████| 756/756 [23:49<00:00,  1.89s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 45.54it/s]
Copying images: 100%|██████████| 756/756 [00:03<00:00, 246.50it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-10/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-10/images


Converting Silt to COCO: 100%|██████████| 2795/2795 [01:58<00:00, 23.63it/s]
Copying images: 100%|██████████| 2795/2795 [1:29:16<00:00,  1.92s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 20.05it/s]
Copying images: 100%|██████████| 2795/2795 [00:10<00:00, 270.55it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-12-24/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-12-24/images


Converting Silt to COCO: 100%|██████████| 1607/1607 [01:07<00:00, 23.98it/s]
Copying images: 100%|██████████| 1607/1607 [51:06<00:00,  1.91s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 34.20it/s]
Copying images: 100%|██████████| 1607/1607 [00:05<00:00, 270.77it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-12-31/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-12-31/images


Converting Silt to COCO: 100%|██████████| 519/519 [00:19<00:00, 26.34it/s]
Copying images: 100%|██████████| 519/519 [15:20<00:00,  1.77s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 103.29it/s]
Copying images: 100%|██████████| 519/519 [00:01<00:00, 311.66it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-19/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-11-19/images


Converting Silt to COCO: 100%|██████████| 1236/1236 [00:51<00:00, 24.10it/s]
Copying images: 100%|██████████| 1236/1236 [39:57<00:00,  1.94s/it]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 52.64it/s]
Copying images: 100%|██████████| 1236/1236 [00:04<00:00, 263.31it/s]


Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-31/annotations
Removed: /data/Sentinel_Datasets/LMNT02_Raw/LMNT02Sat-2024-10-31/images


Converting Silt to COCO: 100%|██████████| 1596/1596 [01:07<00:00, 23.59it/s]
Copying images:  76%|███████▌  | 1210/1596 [38:49<12:23,  1.93s/it]


OSError: [Errno 28] No space left on device

### Curriculum Learning Datasets

In [16]:
Real1Step = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1"
Real2Step = ["/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_High",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_Low"]
Real5Step = ["/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_1",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_2",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_3",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_4",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_5"]
SIM1Step = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim"
SIM2Step = ["/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_High",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_Low"]
SIM5STep = ["/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_1",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_2",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_3",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_4",
            "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_5"]

print("Creating Original L1 Dataset")
merge_coco(training_set_origins_LMNT01, Real1Step, train_test_split=True, train_ratio=.80, val_ratio=0, test_ratio=0.20, notes="LMNT01 10percent overlap with no empty frames for SNR Curriculum Learning study ")

print("Creating Original SIM Dataset")
satsim_path = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/SingleSatSNRDataset"
new_satsim_path = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_temp"
partition_dataset(satsim_path, [new_satsim_path], "snr", 12000)
merge_coco([new_satsim_path], SIM1Step, train_test_split=True, train_ratio=.80, val_ratio=0, test_ratio=0.20, notes="Satsim train test split ")

print("Creating CL Partitions")
partition_dataset(os.path.join(Real1Step, "train"), Real2Step, "local_snr")
partition_dataset(os.path.join(Real1Step, "train"), Real5Step, "local_snr")
partition_dataset(os.path.join(SIM1Step, "train"), SIM2Step, "snr")
partition_dataset(os.path.join(SIM1Step, "train"), SIM5STep, "snr")

print("Creating Train Test Splits")
all_paths = Real2Step+ Real5Step+ SIM2Step+ SIM5STep + [os.path.join(SIM1Step, "train")] + [os.path.join(Real1Step, "train")]
for pt in all_paths:
    new_path = pt+"_TTS"
    print(pt)
    merge_coco([pt], new_path, train_test_split=True, train_ratio=.875, val_ratio=.125, test_ratio=0)

Creating Original L1 Dataset


Processing COCO Datasets: 100%|██████████| 8/8 [00:00<00:00, 15.56it/s]
Copying images: 100%|██████████| 6526/6526 [00:07<00:00, 910.58it/s] 
Copying images: 0it [00:00, ?it/s]
Copying images: 100%|██████████| 1630/1630 [00:02<00:00, 769.36it/s] 


Creating Original SIM Dataset


Copying images: 100%|██████████| 12000/12000 [00:14<00:00, 805.73it/s]
Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
Copying images: 100%|██████████| 9600/9600 [00:10<00:00, 912.94it/s] 
Copying images: 0it [00:00, ?it/s]
Copying images: 100%|██████████| 2400/2400 [00:04<00:00, 552.31it/s]


Creating CL Partitions


Copying images: 100%|██████████| 3265/3265 [00:05<00:00, 596.49it/s] 
Copying images: 100%|██████████| 3261/3261 [00:06<00:00, 511.24it/s]
Copying images: 100%|██████████| 1308/1308 [00:01<00:00, 1064.60it/s]
Copying images: 100%|██████████| 1309/1309 [00:02<00:00, 558.88it/s]
Copying images: 100%|██████████| 1311/1311 [00:02<00:00, 596.71it/s]
Copying images: 100%|██████████| 1308/1308 [00:02<00:00, 534.67it/s]
Copying images: 100%|██████████| 1290/1290 [00:01<00:00, 698.50it/s]
Copying images: 100%|██████████| 4800/4800 [00:08<00:00, 551.62it/s] 
Copying images: 100%|██████████| 4800/4800 [00:06<00:00, 701.20it/s] 
Copying images: 100%|██████████| 1920/1920 [00:03<00:00, 605.80it/s]
Copying images: 100%|██████████| 1920/1920 [00:03<00:00, 604.24it/s]
Copying images: 100%|██████████| 1920/1920 [00:02<00:00, 714.30it/s]
Copying images: 100%|██████████| 1920/1920 [00:04<00:00, 423.33it/s]
Copying images: 100%|██████████| 1920/1920 [00:02<00:00, 862.56it/s] 


Creating Train Test Splits
/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_High


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 12.10it/s]
Copying images: 100%|██████████| 2860/2860 [00:05<00:00, 485.77it/s]
Copying images: 100%|██████████| 405/405 [00:00<00:00, 628.94it/s] 
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_Low


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  5.33it/s]
Copying images: 100%|██████████| 2854/2854 [00:04<00:00, 587.32it/s]
Copying images: 100%|██████████| 407/407 [00:00<00:00, 1167.24it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_1


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 22.81it/s]
Copying images: 100%|██████████| 1146/1146 [00:02<00:00, 454.30it/s]
Copying images: 100%|██████████| 162/162 [00:00<00:00, 326.24it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_2


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 10.05it/s]
Copying images: 100%|██████████| 1147/1147 [00:01<00:00, 609.86it/s]
Copying images: 100%|██████████| 162/162 [00:00<00:00, 332.80it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_3


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 18.61it/s]
Copying images: 100%|██████████| 1152/1152 [00:01<00:00, 968.52it/s] 
Copying images: 100%|██████████| 159/159 [00:00<00:00, 1672.41it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_4


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 17.31it/s]
Copying images: 100%|██████████| 1146/1146 [00:01<00:00, 706.33it/s]
Copying images: 100%|██████████| 162/162 [00:00<00:00, 820.46it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1_5


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 16.19it/s]
Copying images: 100%|██████████| 1128/1128 [00:03<00:00, 333.07it/s]
Copying images: 100%|██████████| 162/162 [00:00<00:00, 438.00it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_High


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  8.65it/s]
Copying images: 100%|██████████| 4200/4200 [00:08<00:00, 471.61it/s] 
Copying images: 100%|██████████| 600/600 [00:01<00:00, 319.68it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_Low


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  3.08it/s]
Copying images: 100%|██████████| 4200/4200 [00:08<00:00, 492.68it/s] 
Copying images: 100%|██████████| 600/600 [00:01<00:00, 493.83it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_1


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  6.09it/s]
Copying images: 100%|██████████| 1680/1680 [00:03<00:00, 432.05it/s]
Copying images: 100%|██████████| 240/240 [00:00<00:00, 314.12it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_2


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  8.51it/s]
Copying images: 100%|██████████| 1680/1680 [00:03<00:00, 508.49it/s]
Copying images: 100%|██████████| 240/240 [00:00<00:00, 449.37it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_3


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  5.17it/s]
Copying images: 100%|██████████| 1680/1680 [00:02<00:00, 607.72it/s] 
Copying images: 100%|██████████| 240/240 [00:00<00:00, 592.26it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_4


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
Copying images: 100%|██████████| 1680/1680 [00:04<00:00, 397.51it/s] 
Copying images: 100%|██████████| 240/240 [00:00<00:00, 595.60it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim_5


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00, 15.37it/s]
Copying images: 100%|██████████| 1680/1680 [00:03<00:00, 458.35it/s]
Copying images: 100%|██████████| 240/240 [00:00<00:00, 656.42it/s]
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_Sim/train


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  2.37it/s]
Copying images: 100%|██████████| 8400/8400 [00:12<00:00, 687.91it/s] 
Copying images: 100%|██████████| 1200/1200 [00:01<00:00, 912.04it/s] 
Copying images: 0it [00:00, ?it/s]


/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/CL_L1/train


Processing COCO Datasets: 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
Copying images: 100%|██████████| 5719/5719 [00:08<00:00, 684.44it/s] 
Copying images: 100%|██████████| 807/807 [00:00<00:00, 1250.81it/s]
Copying images: 0it [00:00, ?it/s]


In [2]:
# /data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/train/images: 26863
# /data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/test/images: 0
# /data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/val/images: 6699
Large_val = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/val"
small_val = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/small_val"
Large_dataset = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/train"
Small_Dataset = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_30_Overlap/small_train"

# /data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/train/images: 15369
# /data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/test/images: 0
# /data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/val/images: 3837

large_train_random = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/train"
small_train_random = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/small_train"
large_val_random = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/val"
small_val_random = "/data/Dataset_Compilation_and_Statistics/Sentinel_Datasets/Finalized_datasets/Panoptic_MC_LMNT01_train_random_Overlap/small_val"

partition_dataset(Large_dataset, [Small_Dataset], dataset_size=10000)
partition_dataset(Large_val, [small_val], dataset_size=2000)
partition_dataset(large_train_random, [small_train_random], dataset_size=10000)
partition_dataset(large_val_random, [small_val_random], dataset_size=2000)

Copying images: 100%|██████████| 10000/10000 [00:14<00:00, 681.41it/s]
Copying images: 100%|██████████| 2000/2000 [00:01<00:00, 1089.42it/s]
Copying images: 100%|██████████| 10000/10000 [00:10<00:00, 952.02it/s]
Copying images: 100%|██████████| 2000/2000 [00:03<00:00, 545.67it/s]
