In [None]:
from ultralytics import YOLO

# Load YOLOv8 model (use medium/large model for better results)
model = YOLO("yolov8m-seg.pt")  # Use "yolov8l-seg.pt" for larger model

# Train on 100K images
model.train(
    data="C:/Users/PMLS/Desktop/xloop_project/deepfashion2/dataset.yaml", 
    epochs=100,  # Start with 100, increase if needed
    batch=32,  # Adjust based on GPU memory (try 16, 32, or 64)
    imgsz=640,  # Increase image size if needed (640, 960, or 1280)
    workers=4,  # Adjust based on CPU cores
    device=0,  # Use GPU (set to 0 for first GPU, or 'cpu' for CPU training)
    amp=True,  # Mixed precision training (faster on newer GPUs)
    cache=True  # Cache images for faster training
)


In [1]:
import os

image_folder = r"C:\Users\PMLS\Desktop\xloop_project\deepfashion2\train\image"
label_folder = r"C:\Users\PMLS\Desktop\xloop_project\deepfashion2\train\labels"

image_files = {os.path.splitext(f)[0] for f in os.listdir(image_folder) if f.endswith(".jpg")}
label_files = {os.path.splitext(f)[0] for f in os.listdir(label_folder) if f.endswith(".txt")}

missing_labels = image_files - label_files  # Images with no corresponding labels
missing_images = label_files - image_files  # Labels with no corresponding images

print(f"Missing Labels: {len(missing_labels)}")
print(f"Missing Images: {len(missing_images)}")

if missing_labels:
    print("Images without labels:", list(missing_labels)[:10])  # Print first 10 missing labels
if missing_images:
    print("Labels without images:", list(missing_images)[:10])  # Print first 10 missing images


Missing Labels: 0
Missing Images: 0


In [1]:
from PIL import Image
import os

# Path to the test images folder
image_folder = r'C:\Users\PMLS\Desktop\xloop_project\deepfashion2\images\test'

# Loop through each image in the folder and try opening it
for img_file in os.listdir(image_folder):
    try:
        img_path = os.path.join(image_folder, img_file)
        with Image.open(img_path) as img:
            img.verify()  # Verify if the image is corrupted
    except (IOError, SyntaxError) as e:
        print(f"Corrupt image: {img_file}")


In [2]:
import os

# Define the root directory of the dataset
dataset_path = r'C:\Users\PMLS\Desktop\xloop_project\deepfashion2'  # Update this path if necessary

# Define the directories for train, validation, and test labels
label_dirs = [
    os.path.join(dataset_path, 'labels', 'train'),
    os.path.join(dataset_path, 'labels', 'val'),
    os.path.join(dataset_path, 'labels', 'test')
]

# Iterate over all the directories
for label_dir in label_dirs:
    # Ensure the directory exists
    if os.path.exists(label_dir):
        print(f"Processing directory: {label_dir}")
        
        # Iterate through each label file in the directory
        for filename in os.listdir(label_dir):
            if filename.endswith('.txt'):  # YOLO label files are typically .txt
                label_path = os.path.join(label_dir, filename)
                
                # Read the label file
                with open(label_path, 'r') as f:
                    lines = f.readlines()

                updated_lines = []
                
                # Process each line in the label file
                for line in lines:
                    parts = line.split()
                    class_idx = int(parts[0])  # Extract the class index
                    if class_idx > 0:  # Subtract 1 from class index (to shift to 0-based)
                        parts[0] = str(class_idx - 1)
                    updated_lines.append(' '.join(parts))

                # Write the updated labels back to the file
                with open(label_path, 'w') as f:
                    f.writelines(updated_lines)
        
        print(f"Finished processing directory: {label_dir}")
    else:
        print(f"Directory not found: {label_dir}")

print("Label files updated successfully!")


Processing directory: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train
Finished processing directory: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train
Processing directory: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\val
Finished processing directory: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\val
Processing directory: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\test
Finished processing directory: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\test
Label files updated successfully!


In [None]:
import os

# Define the root directory of the dataset
dataset_path = r'C:\Users\PMLS\Desktop\xloop_project\deepfashion2'

# Directories for train, validation, and test labels and images
label_dirs = [
    os.path.join(dataset_path, 'labels', 'train'),
    os.path.join(dataset_path, 'labels', 'val'),
    os.path.join(dataset_path, 'labels', 'test')
]
image_dirs = [
    os.path.join(dataset_path, 'images', 'train'),
    os.path.join(dataset_path, 'images', 'val'),
    os.path.join(dataset_path, 'images', 'test')
]

# Function to check and clean the labels and images
def clean_labels_with_segmentation(label_path):
    with open(label_path, 'r') as file:
        lines = file.readlines()

    # List to store valid labels (those that have segmentation data)
    valid_lines = []

    # Flag to check if there is any valid segmentation in the file
    has_valid_segmentation = False

    for line in lines:
        parts = line.strip().split()
        
        if len(parts) >= 5:  # Checking if there are enough parts in the line (x, y coordinates for segmentation)
            # Assuming that the line has bounding box + segmentation information (7 fields)
            # If segmentation data exists, it would be after the bounding box, check if it's reasonable (non-zero values)
            segmentation_values = parts[4:]  # Assuming segmentation data starts from index 4 (5th value onward)
            
            if any(float(value) > 0 for value in segmentation_values):  # If any segmentation value is greater than 0
                valid_lines.append(line)  # Keep the line if it contains valid segmentation
                has_valid_segmentation = True
    
    if has_valid_segmentation:
        # If there are valid lines (with segmentation), write them back to the label file
        with open(label_path, 'w') as file:
            file.writelines(valid_lines)
            print(f"Kept valid label file: {label_path}")
    else:
        # If no valid segmentation data is found, remove both label and corresponding image
        image_path = label_path.replace('labels', 'images').replace('.txt', '.jpg')  # Assuming images are .jpg
        if os.path.exists(image_path):
            os.remove(image_path)  # Remove the associated image
            print(f"Removed image file (no valid segmentation): {image_path}")
        
        os.remove(label_path)  # Remove the label file
        print(f"Removed invalid label file (no segmentation data): {label_path}")

# Iterate over all label directories and clean the label files
for label_dir in label_dirs:
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            label_path = os.path.join(label_dir, filename)
            clean_labels_with_segmentation(label_path)


Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000001.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000002.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000003.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000004.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000005.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000006.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000007.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000008.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000009.txt
Kept valid label file: C:\Users\PMLS\Desktop\xloop_project\deepfashion2\labels\train\000010.txt
Kept valid label file: C:\Users\PMLS\Des