# Yolo

In [5]:
import os
import shutil
from sklearn.model_selection import KFold
from ultralytics import YOLO
import torch
import yaml
from tqdm import tqdm

# Define your root directory
ROOT_DIR = 'Methodology.2.v5i.yolo'
TRAIN_DIR = os.path.join(ROOT_DIR, 'train')
IMAGES_DIR = os.path.join(TRAIN_DIR, 'images')
LABELS_DIR = os.path.join(TRAIN_DIR, 'labels')


def create_k_fold_splits(k=3, seed=42):
    # Get all image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
    all_images = [
        f for f in os.listdir(IMAGES_DIR)
        if os.path.splitext(f)[1].lower() in image_extensions
    ]
    
    # Ensure reproducibility
    kf = KFold(n_splits=k, shuffle=True, random_state=seed)
    
    folds = []
    for fold, (train_idx, val_idx) in enumerate(kf.split(all_images)):
        train_files = [all_images[i] for i in train_idx]
        val_files = [all_images[i] for i in val_idx]
        folds.append((train_files, val_files))
    return folds


def prepare_folds(folds, base_output_dir='folds'):
    if not os.path.exists(base_output_dir):
        os.makedirs(base_output_dir)
    
    for i, (train_files, val_files) in tqdm(enumerate(folds)):
        fold_dir = os.path.join(base_output_dir, f'fold_{i+1}')
        
        # Define directories for training and validation images and labels
        train_images_dir = os.path.join(fold_dir, 'train', 'images')
        train_labels_dir = os.path.join(fold_dir, 'train', 'labels')
        val_images_dir = os.path.join(fold_dir, 'val', 'images')
        val_labels_dir = os.path.join(fold_dir, 'val', 'labels')
        
        # Create directories
        os.makedirs(train_images_dir, exist_ok=True)
        os.makedirs(train_labels_dir, exist_ok=True)
        os.makedirs(val_images_dir, exist_ok=True)
        os.makedirs(val_labels_dir, exist_ok=True)
        
        # Function to copy images and labels
        def copy_files(file_list, img_target_dir, lbl_target_dir):
            for img_file in file_list:
                # Source paths
                src_img = os.path.join(IMAGES_DIR, img_file)
                src_label = os.path.join(LABELS_DIR, os.path.splitext(img_file)[0] + '.txt')
                
                # Destination paths
                dst_img = os.path.join(img_target_dir, img_file)
                dst_label = os.path.join(lbl_target_dir, os.path.splitext(img_file)[0] + '.txt')
                
                # Copy image
                shutil.copy(src_img, dst_img)
                
                # Copy label if it exists
                if os.path.exists(src_label):
                    shutil.copy(src_label, dst_label)
                else:
                    print(f"Warning: Label file for {img_file} does not exist.")
        
        # Copy training files
        copy_files(train_files, train_images_dir, train_labels_dir)
        
        # Copy validation files
        copy_files(val_files, val_images_dir, val_labels_dir)
        
        # Create data.yaml for the fold
        data_yaml = {
            'train': 'train',
            'val': 'val',
            'nc': get_num_classes(),
            'names': get_class_names()
        }
        
        with open(os.path.join(fold_dir, 'data.yaml'), 'w') as f:
            yaml.dump(data_yaml, f)
        
        print(f"Fold {i+1} prepared at {fold_dir}")


def get_num_classes():
    # Correct path to labels directory
    label_files = [f for f in os.listdir(LABELS_DIR) if f.endswith('.txt')]
    classes = set()
    for label_file in label_files:
        with open(os.path.join(LABELS_DIR, label_file), 'r') as f:
            for line in f:
                cls = int(line.strip().split()[0])
                classes.add(cls)
    return len(classes)


def get_class_names():
    return ['depth', 'length', 'width']  # Replace with your actual class names


# Step 1: Create K-Fold splits
folds = create_k_fold_splits(k=3, seed=42)

# Step 2: Prepare directories for each fold
prepare_folds(folds, base_output_dir='folds0')


1it [00:42, 42.09s/it]

Fold 1 prepared at folds0/fold_1


2it [01:22, 41.09s/it]

Fold 2 prepared at folds0/fold_2


3it [02:00, 40.10s/it]

Fold 3 prepared at folds0/fold_3





# COCO

In [2]:
import os
import json
import shutil
from sklearn.model_selection import KFold
from tqdm import tqdm

# Define constants
ROOT_DIR = 'Methodology.2.v5i.coco'
TRAIN_DIR = os.path.join(ROOT_DIR, 'train')
IMAGES_DIR = TRAIN_DIR
ANNOTATIONS_FILE = os.path.join(TRAIN_DIR, '_annotations.coco.json')
FOLDS_DIR = os.path.join(ROOT_DIR, 'folds')
K = 3  # Number of folds
class_names = {1: 'depth', 2: 'length', 3: 'width'}  # Adjust based on your dataset

# Create folds directory
os.makedirs(FOLDS_DIR, exist_ok=True)

# Load COCO annotations
with open(ANNOTATIONS_FILE, 'r') as f:
    coco = json.load(f)

# Get all image IDs
image_ids = [img['id'] for img in coco['images']]

# Initialize KFold
kf = KFold(n_splits=K, shuffle=True, random_state=42)

# Create a list of image dicts
images = coco['images']
annotations = coco['annotations']
categories = coco['categories']

# Mapping from image_id to annotations
from collections import defaultdict

image_id_to_annotations = defaultdict(list)
for ann in annotations:
    image_id_to_annotations[ann['image_id']].append(ann)

# Perform K-Fold splitting
for fold, (train_idx, val_idx) in enumerate(kf.split(image_ids), 1):  
    print(f"Preparing Fold {fold}...")
    fold_dir = os.path.join(FOLDS_DIR, f'fold_{fold}')
    train_fold_dir = os.path.join(fold_dir, 'train')
    val_fold_dir = os.path.join(fold_dir, 'val')
    
    # Create directories
    os.makedirs(os.path.join(train_fold_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(train_fold_dir, 'annotations'), exist_ok=True)
    os.makedirs(os.path.join(val_fold_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(val_fold_dir, 'annotations'), exist_ok=True)
    
    # Split images
    train_images = [images[i] for i in train_idx]
    val_images = [images[i] for i in val_idx]
    
    # Function to copy images and annotations
    def copy_images_and_annotations(images_subset, target_images_dir, target_annotations_file):
        # List to store annotations for this subset
        subset_annotations = []
        
        for img in tqdm(images_subset, desc="Copying images"):
            img_id = img['id']
            img_filename = img['file_name']
            src_img_path = os.path.join(IMAGES_DIR, img_filename)
            dst_img_path = os.path.join(target_images_dir, img_filename)
            
            # Copy image
            shutil.copy(src_img_path, dst_img_path)
            
            # Get annotations for this image
            anns = image_id_to_annotations[img_id]
            for ann in anns:
                subset_annotations.append(ann)
        
        # Create subset COCO JSON
        subset_coco = {
            'images': images_subset,
            'annotations': subset_annotations,
            'categories': categories
        }
        
        # Save subset annotations
        with open(target_annotations_file, 'w') as f:
            json.dump(subset_coco, f)
    
    # Copy training set
    copy_images_and_annotations(
        train_images,
        os.path.join(train_fold_dir, 'images'),
        os.path.join(train_fold_dir, 'annotations', '_annotations.coco.json')
    )
    
    # Copy validation set
    copy_images_and_annotations(
        val_images,
        os.path.join(val_fold_dir, 'images'),
        os.path.join(val_fold_dir, 'annotations', '_annotations.coco.json')
    )
    
    print(f"Fold {fold} prepared at {fold_dir}\n")


Preparing Fold 1...


Copying images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 550/550 [00:11<00:00, 48.14it/s]
Copying images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 276/276 [00:05<00:00, 50.49it/s]


Fold 1 prepared at raw_data/folds/fold_1

Preparing Fold 2...


Copying images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 551/551 [00:11<00:00, 48.83it/s]
Copying images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 275/275 [00:05<00:00, 47.04it/s]


Fold 2 prepared at raw_data/folds/fold_2

Preparing Fold 3...


Copying images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 551/551 [00:11<00:00, 47.35it/s]
Copying images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 275/275 [00:06<00:00, 44.88it/s]

Fold 3 prepared at raw_data/folds/fold_3




