### generates 5 fold dataset

In [None]:
import os
import shutil
from sklearn.model_selection import KFold
from glob import glob

# Define paths
image_folder = r"C:\Users\rb01243\OneDrive - University of Surrey\Desktop\Images_for_Tooth_Seg\images"
label_folder = r"C:\Users\rb01243\OneDrive - University of Surrey\Desktop\Images_for_Tooth_Seg\labels"
save_location = r"C:\Users\rb01243\OneDrive - University of Surrey\Desktop\Images_for_Tooth_Seg\cross_val"

# Create save location if it doesn't exist
os.makedirs(save_location, exist_ok=True)

# Get all image and label file paths
image_paths = sorted(glob(os.path.join(image_folder, "*.jpg")))  # Adjust extension if needed
label_paths = sorted(glob(os.path.join(label_folder, "*.png")))  # Adjust extension if needed

# Ensure the number of images matches the number of labels
print(len(image_paths))
print(len(label_paths))
assert len(image_paths) == len(label_paths), "Mismatch between images and labels count."

# Initialize KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)


# Split into folds
for fold, (train_idx, val_idx) in enumerate(kf.split(image_paths)):
    fold_dir = os.path.join(save_location, f"fold_{fold+1}")
    train_images_dir = os.path.join(fold_dir, "train", "images")
    train_labels_dir = os.path.join(fold_dir, "train", "labels")
    val_images_dir = os.path.join(fold_dir, "val", "images")
    val_labels_dir = os.path.join(fold_dir, "val", "labels")
    
    # Create subdirectories
    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)
    
    # Copy training data
    for idx in train_idx:
        shutil.copy(image_paths[idx], train_images_dir)
        shutil.copy(label_paths[idx], train_labels_dir)
    
    # Copy validation data
    for idx in val_idx:
        shutil.copy(image_paths[idx], val_images_dir)
        shutil.copy(label_paths[idx], val_labels_dir)

print("5-fold dataset with organized subfolders created successfully.")


197
197
5-fold dataset with organized subfolders created successfully.


In [12]:
for fold, (train_idx, val_idx) in enumerate(kf.split(image_paths)):
    print(fold)
    print(train_idx)
    print(val_idx)

0
[  0   1   2   3   4   5   6   7   8  10  11  12  13  14  17  20  21  22
  23  25  26  27  28  29  31  32  33  34  35  36  37  38  39  40  41  42
  43  44  46  47  48  49  50  51  52  53  54  57  58  59  61  62  63  64
  65  66  70  71  72  74  77  78  79  80  81  83  84  86  87  88  89  90
  91  92  94  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110
 111 116 117 118 120 121 123 124 125 126 127 128 129 130 131 133 134 135
 136 137 138 141 142 143 144 145 148 149 150 151 152 153 154 156 157 158
 159 160 161 163 164 165 166 167 168 169 171 172 173 176 177 178 179 180
 181 182 183 185 187 188 189 191 192 193 194 195 196]
[  9  15  16  18  19  24  30  45  55  56  60  67  68  69  73  75  76  82
  85  93  95 112 113 114 115 119 122 132 139 140 146 147 155 162 170 174
 175 184 186 190]
1
[  0   1   3   4   6   7   8   9  10  11  13  14  15  16  17  18  19  20
  21  23  24  25  27  28  30  32  33  34  37  39  40  43  44  45  47  48
  49  50  52  53  54  55  56  57  58  59  60  61

### Training loop for 5 fold cross validation

In [1]:
import os


folds = 5 



# Base parameters for train.py
base_command = (
    "python train.py "
    "--tree-root C:/Users/rb01243/OneDrive - University of Surrey/Documents/GitHub/OC CODE/Bone_Loss_Final/Restrictive-Hierarchical-Code/class_tree_tl.json "
    "--class-map C:/Users/rb01243/OneDrive - University of Surrey/Documents/GitHub/OC CODE/Bone_Loss_Final/Restrictive-Hierarchical-Code/class_map.csv "
    "--model-weights C:/Users/rb01243/OneDrive - University of Surrey/Documents/GitHub/OC CODE/Bone_Loss_Final/Restrictive-Hierarchical-Code/unet_carvana_scale1.0_epoch2.pth "
    "--batch-size 4 "
    "--val-batch 4 "
    "--img-size 620 "
    "--test-remove True "
    "--epochs 50 "
    "--num-workers 0 "
    "--save-images-batch True "
    "--save-images-batch-num 5 "
    "--no-ph-weights True "
    "--hierarchical-loss False "
    "--learning-rate 0.01 "
    "--learning-rate-scheduler-minimum 0.001"
)

# Loop through the 5-fold subfolders
for fold in range(folds):
    save_path = f"C:/Users/rb01243/OneDrive - University of Surrey/Documents/GitHub/OC CODE/Bone_Loss_Final/Restrictive-Hierarchical-Code/runs/exp{fold+1}"
    data_root = f"C:/Users/rb01243/OneDrive - University of Surrey/Desktop/Images_for_Tooth_Seg/cross_val/fold_{fold+1}/train"
    val_dataset = f"C:/Users/rb01243/OneDrive - University of Surrey/Desktop/Images_for_Tooth_Seg/cross_val/fold_{fold+1}/val"
    # dataset_path = f"./data/fold_{fold+1}"
    
    command = (
        f"{base_command} "
        f"--save-path {save_path} "
        f"--data-root {data_root} "
        f"--val-dataset {val_dataset} "
        # f"--dataset {dataset_path}"
    )
    os.system(command)