In [1]:
!pip install roboflow
# !pip install -U albumentations
# !pip install kaggle

import os
import glob as gb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import albumentations as A
import cv2
import shutil
import hashlib
import random
import shutil
import yaml
import json
import re
import zipfile
from roboflow import Roboflow
from tqdm import tqdm
from albumentations.pytorch import ToTensorV2

Collecting roboflow
  Downloading roboflow-1.1.47-py3-none-any.whl.metadata (9.7 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.47-py3-none-any.whl (80 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.4/80.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Installing collected packages: filetype, roboflow
Successfully installed filetype-1.2.0 roboflow-1.1.47


In [2]:
def sizes_of_image(folder_path, num_of_classes = 20):
    size=[]

    if len(os.listdir(folder_path)) == 0:
        print('this folder is empty')

    elif len(os.listdir(folder_path)) > num_of_classes:
        images=gb.glob(pathname=folder_path +'/*.jpg')
        for img in tqdm(images):
            image=plt.imread(img)
            size.append(image.shape)
            size_series = pd.Series(size).value_counts()

    elif len(os.listdir(folder_path)) <= num_of_classes:
        for folder in os.listdir(folder_path):
            images=gb.glob(pathname=folder_path + '/' +folder+'/*.jpg')
            for img in tqdm(images):
                image=plt.imread(img)
                size.append(image.shape)
                size_series = pd.Series(size).value_counts()
    
    print(f'\n{size_series}')
    return size_series


def unzip_and_delete(zip_file_path: str, extract_dir: str = '/kaggle/working/'):
    """
    Unzips the specified zip file to the given directory and deletes the zip file after extraction.
    
    Parameters:
    - zip_file_path (str): Path to the zip file.
    - extract_dir (str): Directory where the files will be extracted. Defaults to '/kaggle/working/'.
    """
    try:
        # Unzipping the file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted contents to {extract_dir}")
        
        # Deleting the zip file after successful extraction
        os.remove(zip_file_path)
        print(f"Deleted zip file: {zip_file_path}")
        
    except Exception as e:
        print(f"An error occurred: {e}")


def rename_and_move_folder(src_folder_path: str, new_folder_name: str, dest_dir: str):
    """
    Renames a folder and moves it to a new destination.
    
    Parameters:
    - src_folder_path (str): Path to the current folder.
    - new_folder_name (str): The new name for the folder.
    - dest_dir (str): The destination directory to move the renamed folder.
    """
    # Ensure destination directory exists, if not create it
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    
    # Get the new full path for the folder
    new_folder_path = os.path.join(dest_dir, new_folder_name)
    
    try:
        # Rename and move the folder
        os.rename(src_folder_path, new_folder_path)
        print(f"Folder renamed to {new_folder_name} and moved to {dest_dir}")
    except Exception as e:
        print(f"An error occurred: {e}")


# Function to resize image
def preprocess_image(image_path, target_size=(640, 640)):
    img = cv2.imread(image_path)

    # Resize the image
    img = cv2.resize(img, target_size)
    return img


# Function to parse .txt label file with complex bounding box points
def parse_txt_annotation(label_path):
    bboxes = []
    with open(label_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            data = list(map(float, line.strip().split()))
            class_id = int(data[0])
            points = data[1:]  # Rest are the coordinates of bounding box points
            bboxes.append((class_id, points))
    return bboxes


# Function to save augmented bounding boxes back into .txt format
def save_txt_annotation(label_path, bboxes):
    with open(label_path, 'w') as f:
        for class_id, points in bboxes:
            points_str = ' '.join(map(str, points))
            f.write(f"{class_id} {points_str}\n")


# Function to convert points to bounding box format [x_min, y_min, x_max, y_max]
def points_to_bbox(points):
    x_coords = points[::2]  # Extract x coordinates
    y_coords = points[1::2]  # Extract y coordinates
    x_min, x_max = min(x_coords), max(x_coords)
    y_min, y_max = min(y_coords), max(y_coords)
    return [x_min, y_min, x_max, y_max]


# Function to apply augmentations
def augment_image(image, bboxes, augment_type):
    transform = None

    if augment_type == 'hflip':
        transform = A.HorizontalFlip(p=1.0)
    elif augment_type == 'vflip':
        transform = A.VerticalFlip(p=1.0)
    elif augment_type == 'rot':
        transform = A.Rotate(limit=15, p=1.0)
    elif augment_type == 'hflip_vflip':
        transform = A.Compose([A.HorizontalFlip(p=1.0), A.VerticalFlip(p=1.0)])
    elif augment_type == 'hflip_rot':
        transform = A.Compose([A.HorizontalFlip(p=1.0), A.Rotate(limit=15, p=1.0)])
    elif augment_type == 'vflip_rot':
        transform = A.Compose([A.VerticalFlip(p=1.0), A.Rotate(limit=15, p=1.0)])
    elif augment_type == 'hflip_vflip_rot':
        transform = A.Compose([A.HorizontalFlip(p=1.0), A.VerticalFlip(p=1.0), A.Rotate(limit=15, p=1.0)])

    # Convert points to bounding boxes [x_min, y_min, x_max, y_max] and split labels and bboxes
    simple_bboxes = [points_to_bbox(points) for _, points in bboxes]
    labels = [class_id for class_id, _ in bboxes]  # Extract labels separately

    # Albumentations expects bboxes without labels for transformations
    transformed = transform(image=image, bboxes=simple_bboxes, labels=labels)

    # Return the augmented image and bounding boxes (including labels)
    augmented_bboxes = [(label, bbox) for bbox, label in zip(transformed['bboxes'], transformed['labels'])]
    return transformed['image'], augmented_bboxes


# Main function to process and augment the dataset
def process_and_augment_data(image_folder, label_folder):
    augmentations = [
        ('hflip', 'Horizontal Flip'),
        ('vflip', 'Vertical Flip'),
        ('rot', 'Rotation between -15 and +15 degrees'),
        ('hflip_vflip', 'Horizontal and Vertical Flip'),
        ('hflip_rot', 'Horizontal Flip and Rotation'),
        ('vflip_rot', 'Vertical Flip and Rotation'),
        ('hflip_vflip_rot', 'Horizontal Flip, Vertical Flip, and Rotation')
    ]
    
    # Loop over all images in the folder
    for image_filename in os.listdir(image_folder):
        if image_filename.endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(image_folder, image_filename)

            # Find corresponding label file (assumes .txt for labels)
            label_path = os.path.join(label_folder, image_filename.replace('.jpg', '.txt').replace('.jpeg', '.txt').replace('.png', '.txt'))

            if os.path.exists(label_path):
                # Preprocess image (resize)
                processed_img = preprocess_image(image_path, target_size=(640, 640))

                # Parse bounding boxes from label
                bboxes = parse_txt_annotation(label_path)

                # Apply each augmentation to the image and save results
                for aug_type, aug_desc in augmentations:
                    augmented_img, augmented_bboxes = augment_image(processed_img, bboxes, aug_type)

                    # Save augmented image (in the same folder with specific suffix)
                    augmented_image_path = os.path.join(image_folder, image_filename.replace('.jpg', f'_{aug_type}.jpg').replace('.jpeg', f'_{aug_type}.jpeg').replace('.png', f'_{aug_type}.png'))
                    cv2.imwrite(augmented_image_path, augmented_img)

                    # Save augmented bounding boxes (in the same folder with specific suffix)
                    augmented_label_path = os.path.join(label_folder, image_filename.replace('.jpg', f'_{aug_type}.txt').replace('.jpeg', f'_{aug_type}.txt').replace('.png', f'_{aug_type}.txt'))
                    save_txt_annotation(augmented_label_path, augmented_bboxes)


# def remap_labels(dataset_path, label_mapping):
#     label_folders = ['train', 'valid', 'test']
    
#     for folder in label_folders:
#         label_path = os.path.join(dataset_path, folder, 'labels')
        
#         for label_file in os.listdir(label_path):
#             label_file_path = os.path.join(label_path, label_file)
            
#             # Open the label file and read all lines
#             with open(label_file_path, 'r') as f:
#                 lines = f.readlines()
            
#             # Open the same file in write mode to replace the old labels with the new ones
#             with open(label_file_path, 'w') as f:
#                 for line in lines:
#                     parts = line.split()
#                     old_label = int(parts[0])
#                     new_label = label_mapping[old_label]
                    
#                     # Replace the old label with the new label and write the line back
#                     f.write(f"{new_label} " + " ".join(parts[1:]) + "\n")


def remap_labels(dataset_path, label_mapping):
    label_folders = ['train', 'valid', 'test']
    
    for folder in label_folders:
        label_path = os.path.join(dataset_path, folder, 'labels')
        
        for label_file in os.listdir(label_path):
            label_file_path = os.path.join(label_path, label_file)
            
            # Open the label file and read all lines
            with open(label_file_path, 'r') as f:
                lines = f.readlines()
            
            # Open the same file in write mode to replace the old labels or delete lines
            with open(label_file_path, 'w') as f:
                for line in lines:
                    parts = line.split()
                    old_label = int(parts[0])
                    
                    # Get the new label from the mapping
                    new_label = label_mapping.get(old_label)
                    
                    # If the new label is 'delete', skip this line
                    if new_label == 'delete':
                        continue
                    
                    # Write the modified line with the new label
                    f.write(f"{new_label} " + " ".join(parts[1:]) + "\n")


# Define the image extensions you want to copy
IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')


def copy_images(src_folders, dest_folder):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    image_count = 0

    for folder in src_folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.lower().endswith(IMAGE_EXTENSIONS):
                    src_file = os.path.join(root, file)
                    dest_file = os.path.join(dest_folder, file)

                    # If the file already exists in the destination, skip it
                    if not os.path.exists(dest_file):
                        shutil.copy2(src_file, dest_file)
                        image_count += 1

    print(f"Total number of images copied: {image_count}")


# Define the text file extension
TEXT_EXTENSIONS = ('.txt')

def copy_text_files(src_folders, dest_folder):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    text_file_count = 0

    for folder in src_folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.lower().endswith(TEXT_EXTENSIONS):
                    src_file = os.path.join(root, file)
                    dest_file = os.path.join(dest_folder, file)

                    # If the file already exists in the destination, skip it
                    if not os.path.exists(dest_file):
                        shutil.copy2(src_file, dest_file)
                        text_file_count += 1

    print(f"Total number of text files copied: {text_file_count}")


def numbers_of_image(folder_path, num_of_classes = 20):
    
    if len(os.listdir(folder_path)) == 0:
        print('this folder is empty')

    elif len(os.listdir(folder_path)) > num_of_classes:
        images=gb.glob(pathname=folder_path +'/*.jpg')
        text=gb.glob(pathname=folder_path +'/*.txt')
        print(f'for that path of data, we found {len(images)} image and {len(text)} text')

    elif len(os.listdir(folder_path)) <= num_of_classes:
        classes = {}
        i = 0
        for folder in os.listdir(folder_path):
            classes.update({folder: i})
            i += 1
            images=gb.glob(pathname=folder_path + '/' +folder+'/*.jpg')
            text=gb.glob(pathname=folder_path + '/' +folder+'/*.txt')
            print(f'for that path of data, we found {len(images)} in folder {folder} and and {len(text)} text in folder {folder}\n')
        print(f'The classification classes is {classes}')
        return classes


def remove_empty_labels(label_folder: str, image_folder: str, image_extensions=['.jpg', '.png']):
    # Iterate over all files in the label folder
    for label_file in os.listdir(label_folder):
        label_path = os.path.join(label_folder, label_file)

        # Check if the label file is empty or contains only whitespace/newlines
        with open(label_path, 'r') as f:
            content = f.read().strip()  # Strip leading/trailing whitespace and newlines
        
        if len(content) == 0:  # If the content is empty after stripping
            # Delete the empty label file
            os.remove(label_path)

            # Get the base name without extension to find the corresponding image
            base_name = os.path.splitext(label_file)[0]

            # Check for the corresponding image file in the image folder
            for ext in image_extensions:
                image_path = os.path.join(image_folder, base_name + ext)
                
                # If the image file exists, delete it
                if os.path.exists(image_path):
                    os.remove(image_path)
                    break  # Stop checking other extensions if image is found and deleted


def get_file_hash(file_path: str, chunk_size=8192) -> str:
    """Generate an MD5 hash for a file."""
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()


def delete_duplicate_images_and_labels(images_folder: str, labels_folder: str, image_extensions=None):
    if image_extensions is None:
        image_extensions = ['.jpg', '.png', '.jpeg']  # Adjust image extensions if needed

    hash_map = {}  # To store file hashes and their paths
    duplicates_found = 0

    # Iterate through the image folder
    for image_file in os.listdir(images_folder):
        image_path = os.path.join(images_folder, image_file)

        # Check if it's a valid image file
        if os.path.isfile(image_path) and image_file.lower().endswith(tuple(image_extensions)):
            # Get the hash of the image
            image_hash = get_file_hash(image_path)

            # Check if the hash is already in the hash_map
            if image_hash in hash_map:
                # Duplicate found: delete the image and the corresponding label
                os.remove(image_path)
                duplicates_found += 1

                # Get the base name of the image file (without extension)
                image_base = os.path.splitext(image_file)[0]

                # Delete the corresponding label file
                label_file = image_base + '.txt'  # Assuming label files have '.txt' extension
                label_path = os.path.join(labels_folder, label_file)

                if os.path.exists(label_path):
                    os.remove(label_path)
            else:
                # Store the hash and its corresponding file path
                hash_map[image_hash] = image_file

    print(f"Total duplicates found and deleted: {duplicates_found}")


def remove_duplicate_labels(label_folder: str, image_folder: str, image_extensions=['.jpg', '.png']):
    # Dictionary to store the hash of file content as key and the file name as value
    content_hashes = {}

    # Iterate over all label files in the label folder
    for label_file in os.listdir(label_folder):
        label_path = os.path.join(label_folder, label_file)
        
        # Read the content of the label file
        with open(label_path, 'r') as f:
            content = f.read().strip()  # Read and strip whitespace/newlines

        # Create a hash of the content for comparison
        content_hash = hashlib.md5(content.encode()).hexdigest()

        # If this hash has been encountered before, it's a duplicate
        if content_hash in content_hashes:
            # Delete the current duplicate label file
            os.remove(label_path)

            # Get the base name of the label file to find the corresponding image
            base_name = os.path.splitext(label_file)[0]

            # Check for the corresponding image file in the image folder and delete it
            for ext in image_extensions:
                image_path = os.path.join(image_folder, base_name + ext)
                if os.path.exists(image_path):
                    os.remove(image_path)
                    break  # Stop checking other extensions once the image is deleted

        else:
            # If it's the first time encountering this content, store it in the dictionary
            content_hashes[content_hash] = label_file


def rename_images_and_labels(image_folder, label_folder, output_image_folder, output_label_folder):
    if not os.path.exists(output_image_folder):
        os.makedirs(output_image_folder)
    if not os.path.exists(output_label_folder):
        os.makedirs(output_label_folder)

    images = sorted([f for f in os.listdir(image_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))])
    labels = sorted([f for f in os.listdir(label_folder) if f.lower().endswith('.txt')])

    assert len(images) == len(labels), "The number of images and labels must be the same"

    for i, (image, label) in enumerate(zip(images, labels)):
        new_name = f'image{i+1}'

        # Rename and copy images
        src_image_path = os.path.join(image_folder, image)
        dst_image_path = os.path.join(output_image_folder, new_name + os.path.splitext(image)[1])
        os.rename(src_image_path, dst_image_path)

        # Rename and copy labels
        src_label_path = os.path.join(label_folder, label)
        dst_label_path = os.path.join(output_label_folder, new_name + os.path.splitext(label)[1])
        os.rename(src_label_path, dst_label_path)

    print(f"Renamed {len(images)} images and labels.")


def split_dataset(image_folder, label_folder, output_folder, train_pct, test_pct, val_pct):
    assert train_pct + test_pct + val_pct == 100, "The percentages must sum up to 100"

    images = sorted([f for f in os.listdir(image_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))])
    labels = sorted([f for f in os.listdir(label_folder) if f.lower().endswith('.txt')])

    assert len(images) == len(labels), "The number of images and labels must be the same"

    combined = list(zip(images, labels))
    random.shuffle(combined)

    train_split = int(len(combined) * (train_pct / 100))
    test_split = int(len(combined) * (test_pct / 100))
    
    train_data = combined[:train_split]
    test_data = combined[train_split:train_split + test_split]
    val_data = combined[train_split + test_split:]

    def copy_data(data, subset_name):
        subset_image_folder = os.path.join(output_folder, subset_name, 'images')
        subset_label_folder = os.path.join(output_folder, subset_name, 'labels')
        
        os.makedirs(subset_image_folder, exist_ok=True)
        os.makedirs(subset_label_folder, exist_ok=True)

        for image, label in data:
            shutil.copy2(os.path.join(image_folder, image), os.path.join(subset_image_folder, image))
            shutil.copy2(os.path.join(label_folder, label), os.path.join(subset_label_folder, label))

    copy_data(train_data, 'train')
    copy_data(test_data, 'test')
    copy_data(val_data, 'val')

    print(f"Training set: {len(train_data)} images and labels")
    print(f"Test set: {len(test_data)} images and labels")
    print(f"Validation set: {len(val_data)} images and labels")


def create_dataset_yaml(output_path, train_path, val_path, test_path, labels):
    dataset_config = {
        'train': train_path,  # Path to the training dataset folder
        'val': val_path,      # Path to the validation dataset folder
        'test': test_path,    # Path to the test dataset folder (optional)
        'names': labels,      # List of class names
        'nc': len(labels)     # Number of classes
    }

    with open(output_path, 'w') as file:
        yaml.dump(dataset_config, file, default_flow_style=False)

    print(f"YAML file created at {output_path}")


def get_random_txt_file(folder_path, file_name=None, rand=True):
    # List all .txt files in the folder
    txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
    
    if not txt_files:
        raise FileNotFoundError("No .txt files found in the folder.")
    
    # Pick a random .txt file
    random_file = file_name
    if rand:
        random_file = random.choice(txt_files)
    
    random_file_path = os.path.join(folder_path, random_file)
    
    return random_file_path

def read_file_with_dashes(file_path):
    with open(file_path, 'r') as file:
        lines = [line.strip() for line in file]  # Read all lines and strip newlines
    
    # Iterate through the lines in pairs
    for i in range(0, len(lines), 2):
        if i + 1 < len(lines):
            print(f"{lines[i]} ------- {lines[i+1]}")  # Print two lines with a dash
        else:
            print(lines[i])  # Print the last line if it's an odd number of lines


def draw_bounding_boxes(image_path, label_path):
    # Load the image
    image = cv2.imread(image_path)
    height, width, _ = image.shape
    
    # Read label file and draw bounding boxes
    with open(label_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            # YOLO format: <class_id> <x_center> <y_center> <width> <height>
            class_id, x_center, y_center, bbox_width, bbox_height = map(float, line.strip().split())
            class_id = int(class_id)
            
            # Convert normalized YOLO coordinates to pixel coordinates
            x_center = int(x_center * width)
            y_center = int(y_center * height)
            bbox_width = int(bbox_width * width)
            bbox_height = int(bbox_height * height)
            
            # Calculate top-left and bottom-right corners of the bounding box
            x1 = int(x_center - bbox_width / 2)
            y1 = int(y_center - bbox_height / 2)
            x2 = int(x_center + bbox_width / 2)
            y2 = int(y_center + bbox_height / 2)
            
            # Draw bounding box and label on the image
            color = (0, 255, 0)  # Green color for bounding box
            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(image, label_names[class_id], (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    
    # Convert BGR to RGB for displaying via matplotlib
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Display the image
    plt.figure(figsize=(10, 10))
    plt.imshow(image_rgb)
    plt.axis('off')  # Hide axes
    plt.show()


def show_random_image_with_bbox(image_dir, label_dir):
    # Get a list of all images and labels in the training directory
    images = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
    
    # Pick a random image
    random_image = random.choice(images)
    
    # Get corresponding label file
    label_file = random_image.replace('.jpg', '.txt')
    image_path = os.path.join(image_dir, random_image)
    label_path = os.path.join(label_dir, label_file)
    
    if os.path.exists(label_path):
        draw_bounding_boxes(image_path, label_path)
    else:
        print(f"Label file not found for {random_image}")


def rename_directory(current_name, new_name):
    if os.path.exists(current_name):
        os.rename(current_name, new_name)
        print(f"Directory renamed from {current_name} to {new_name}")
    else:
        print(f"Directory {current_name} does not exist.")


def get_folder_size(folder_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(folder_path):
        for file in filenames:
            file_path = os.path.join(dirpath, file)
            # Check if file exists to avoid errors
            if os.path.exists(file_path):
                total_size += os.path.getsize(file_path)
    return total_size


def zip_folder(folder_path, output_zip):
    shutil.make_archive(output_zip, 'zip', folder_path)
    print(f"{output_zip}.zip has been created successfully.")
    return output_zip



# Dataset 1

In [3]:
# # 1184 image after augmentation 18670 image
# rf = Roboflow(api_key="H5hp4guRnCskaddlSpKs")
# project = rf.workspace("weld-1-mba6s").project("final-dataset-rc2vm")
# version = project.version(3)
# dataset1 = version.download("yolov5")

# print('\n')

# dataset1.location

In [4]:
# 1184 image after augmentation 3172 image
rf = Roboflow(api_key="H5hp4guRnCskaddlSpKs")
project = rf.workspace("test-6g9pr").project("final-dataset-rc2vm-6nb7m")
version = project.version(1)
dataset1 = version.download("yolov5")

print('\n')

dataset1.location

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Final-Dataset-1 to yolov5pytorch:: 100%|██████████| 131306/131306 [00:02<00:00, 44532.78it/s]





Extracting Dataset Version Zip to Final-Dataset-1 in yolov5pytorch:: 100%|██████████| 6356/6356 [00:00<00:00, 7573.61it/s]








'/kaggle/working/Final-Dataset-1'

In [5]:
size_series_dataset1_train = sizes_of_image(dataset1.location + '/train')

size_series_dataset1_valid = sizes_of_image(dataset1.location + '/valid')

size_series_dataset1_test = sizes_of_image(dataset1.location + '/test')

0it [00:00, ?it/s]
100%|██████████| 2988/2988 [00:10<00:00, 290.01it/s]



(640, 640, 3)    2988
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 124/124 [00:00<00:00, 344.82it/s]



(640, 640, 3)    124
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 60/60 [00:00<00:00, 346.72it/s]


(640, 640, 3)    60
Name: count, dtype: int64





# Dataset 2

In [6]:
rf = Roboflow(api_key="H5hp4guRnCskaddlSpKs")
project = rf.workspace("test-ulmj1").project("defect-detection-renamed-classes")
version = project.version(4)
dataset2 = version.download("yolov5")

print('\n')

dataset2.location

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Defect-Detection-(renamed-classes)-4 to yolov5pytorch:: 100%|██████████| 446882/446882 [00:09<00:00, 46215.59it/s]





Extracting Dataset Version Zip to Defect-Detection-(renamed-classes)-4 in yolov5pytorch:: 100%|██████████| 18420/18420 [00:02<00:00, 7056.82it/s]






'/kaggle/working/Defect-Detection-(renamed-classes)-4'

In [7]:
size_series_dataset2_train = sizes_of_image(dataset2.location + '/train')

size_series_dataset2_valid = sizes_of_image(dataset2.location + '/valid')

size_series_dataset2_test = sizes_of_image(dataset2.location + '/test')

0it [00:00, ?it/s]
100%|██████████| 8343/8343 [00:37<00:00, 223.82it/s]



(640, 640, 3)    8343
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 520/520 [00:01<00:00, 323.73it/s]



(640, 640, 3)    520
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 341/341 [00:00<00:00, 341.89it/s]


(640, 640, 3)    341
Name: count, dtype: int64





# Dataset 3

In [8]:
# # 3664 image without augmentation
# rf = Roboflow(api_key="H5hp4guRnCskaddlSpKs")
# project = rf.workspace("welding-2bplp").project("weld-quality-inspection-rei9l")
# version = project.version(9)
# dataset3 = version.download("yolov5")

# print('\n')

# dataset3.location

In [9]:
# 3664 image after augmentation 9720 image
rf = Roboflow(api_key="GZsCMarBXJuRhXauCyMU")
project = rf.workspace("z-gtntm").project("weld-quality-inspection-rei9l-epfj5")
version = project.version(1)
dataset3 = version.download("yolov5")

print('\n')

dataset3.location

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Weld-quality-inspection-1 to yolov5pytorch:: 100%|██████████| 624162/624162 [00:13<00:00, 46727.47it/s]





Extracting Dataset Version Zip to Weld-quality-inspection-1 in yolov5pytorch:: 100%|██████████| 19446/19446 [00:03<00:00, 6013.71it/s]






'/kaggle/working/Weld-quality-inspection-1'

In [10]:
size_series_dataset3_train = sizes_of_image(dataset3.location + '/train')

size_series_dataset3_valid = sizes_of_image(dataset3.location + '/valid')

size_series_dataset3_test = sizes_of_image(dataset3.location + '/test')

0it [00:00, ?it/s]
100%|██████████| 9090/9090 [00:44<00:00, 203.09it/s]



(640, 640, 3)    9090
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 422/422 [00:01<00:00, 324.41it/s]



(640, 640, 3)    422
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 205/205 [00:00<00:00, 330.42it/s]


(640, 640, 3)    205
Name: count, dtype: int64





# Dataset 4

In [11]:
rf = Roboflow(api_key="H5hp4guRnCskaddlSpKs")
project = rf.workspace("celebal").project("welding-aiu6l")
version = project.version(1)
dataset4 = version.download("yolov5")

print('\n')

dataset4.location

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Welding-1 to yolov5pytorch:: 100%|██████████| 35475/35475 [00:00<00:00, 37601.38it/s]





Extracting Dataset Version Zip to Welding-1 in yolov5pytorch:: 100%|██████████| 1456/1456 [00:00<00:00, 6813.01it/s]








'/kaggle/working/Welding-1'

In [12]:
size_series_dataset4_train = sizes_of_image(dataset4.location + '/train')

size_series_dataset4_valid = sizes_of_image(dataset4.location + '/valid')

size_series_dataset4_test = sizes_of_image(dataset4.location + '/test')

0it [00:00, ?it/s]
100%|██████████| 566/566 [00:01<00:00, 338.32it/s]



(640, 640, 3)    566
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 103/103 [00:00<00:00, 352.81it/s]



(640, 640, 3)    103
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 53/53 [00:00<00:00, 363.59it/s]


(640, 640, 3)    53
Name: count, dtype: int64





# Dataset 5

In [13]:
rf = Roboflow(api_key="H5hp4guRnCskaddlSpKs")
project = rf.workspace("welding-ds6wr").project("welding-all6y")
version = project.version(1)
dataset5 = version.download("yolov5")

print('\n')

dataset5.location

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in welding-1 to yolov5pytorch:: 100%|██████████| 85720/85720 [00:02<00:00, 31878.79it/s]





Extracting Dataset Version Zip to welding-1 in yolov5pytorch:: 100%|██████████| 2226/2226 [00:00<00:00, 5575.25it/s]






'/kaggle/working/welding-1'

In [14]:
size_series_dataset5_train = sizes_of_image(dataset5.location + '/train')

size_series_dataset5_valid = sizes_of_image(dataset5.location + '/valid')

size_series_dataset5_test = sizes_of_image(dataset5.location + '/test')

0it [00:00, ?it/s]
100%|██████████| 774/774 [00:02<00:00, 301.63it/s]



(640, 640, 3)    774
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 222/222 [00:00<00:00, 309.50it/s]



(640, 640, 3)    222
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 111/111 [00:00<00:00, 319.01it/s]


(640, 640, 3)    111
Name: count, dtype: int64





# Dataset 6

In [15]:
!kaggle datasets download -d sukmaadhiwijaya/welding-defect-object-detection

Dataset URL: https://www.kaggle.com/datasets/sukmaadhiwijaya/welding-defect-object-detection
License(s): CC0-1.0
Downloading welding-defect-object-detection.zip to /kaggle/working
 87%|██████████████████████████████████▉     | 133M/152M [00:01<00:00, 51.7MB/s]
100%|████████████████████████████████████████| 152M/152M [00:01<00:00, 88.5MB/s]


In [16]:
zip_file_path = '/kaggle/working/welding-defect-object-detection.zip'
extract_dir = '/kaggle/working/'
unzip_and_delete(zip_file_path)

Extracted contents to /kaggle/working/
Deleted zip file: /kaggle/working/welding-defect-object-detection.zip


In [17]:
source_folder = '/kaggle/working/The Welding Defect Dataset - v2/The Welding Defect Dataset - v2'
new_folder_name = 'Welding Defect Dataset'
destination_folder = '/kaggle/working'  # Destination folder
rename_and_move_folder(source_folder, new_folder_name, destination_folder)

Folder renamed to Welding Defect Dataset and moved to /kaggle/working


In [18]:
remove_folders = ['/kaggle/working/The Welding Defect Dataset - v2', '/kaggle/working/The Welding Defect Dataset']

for folder in remove_folders:
    if os.path.exists(folder):
        shutil.rmtree(folder)
        print(f"Folder '{folder}' has been removed.")
    else:
        print(f"Folder '{folder}' does not exist.")

Folder '/kaggle/working/The Welding Defect Dataset - v2' has been removed.
Folder '/kaggle/working/The Welding Defect Dataset' has been removed.


In [19]:
size_series_dataset5_train = sizes_of_image('/kaggle/working/Welding Defect Dataset/train')

size_series_dataset5_valid = sizes_of_image('/kaggle/working/Welding Defect Dataset//valid')

size_series_dataset5_test = sizes_of_image('/kaggle/working/Welding Defect Dataset//test')

0it [00:00, ?it/s]
100%|██████████| 1619/1619 [00:05<00:00, 316.29it/s]



(640, 640, 3)    1619
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 283/283 [00:00<00:00, 347.10it/s]



(640, 640, 3)    283
Name: count, dtype: int64


0it [00:00, ?it/s]
100%|██████████| 126/126 [00:00<00:00, 328.02it/s]


(640, 640, 3)    126
Name: count, dtype: int64





# Preprocessing and Augmentation

In [20]:
# numbers_of_image('/kaggle/working/Weld-quality-inspection-9/train/images')

In [21]:
# numbers_of_image('/kaggle/working/Weld-quality-inspection-9/train/labels')

In [22]:
# # Folder paths (same for both original and augmented)
# image_folder = '/kaggle/working/Weld-quality-inspection-9/train/images'
# label_folder = '/kaggle/working/Weld-quality-inspection-9/train/labels'

# # Process and augment dataset, save generated files in the same folder
# process_and_augment_data(image_folder, label_folder)

In [23]:
# numbers_of_image('/kaggle/working/Weld-quality-inspection-9/train/images')

In [24]:
# numbers_of_image('/kaggle/working/Weld-quality-inspection-9/train/labels')

# new lebals

In [25]:
# label_mappings_1 = ['Defect', 'Welding Line', 'Workpiece', 'porosity']
# label_mappings_2 = ['Burn-through', 'Crack', 'Crater', 'Incomplete penetration', 'Overflow', 'Porosity', 'Spatter', 'Undercut', 'Welding line', 'irregular_weld_Line']
# label_mappings_3 = ['Bad Welding', 'Crack', 'Excess Reinforcement', 'Good Welding', 'Porosity', 'Spatters']
# label_mappings_4 = ['bad_welding', 'crack', 'good_welding', 'pores']
# label_mappings_5 = ['Bad_Welding', 'Crack', 'Excess_Reinforcement', 'Good_Welding', 'Porosity', 'Spatters']
# label_mappings_6 = ['bad_welding', 'crack', 'Defect']

# my_label_mappings = ['Defect', 'Good Welding', 'Porosity', 'Bad Welding']


label_mappings_1 = {0: 0, 1: 'delete', 2: 'delete', 3: 2}
label_mappings_2 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 2, 6: 0, 7: 0, 8: 'delete', 9: 0}
label_mappings_3 = {0: 3, 1: 0, 2: 3, 3: 1, 4: 2, 5: 0}
label_mappings_4 = {0: 3, 1: 0, 2: 1, 3: 2}
label_mappings_5 = {0: 3, 1: 0, 2: 3, 3: 1, 4: 2, 5: 0}
label_mappings_6 = {0: 3, 1: 1, 2: 0}

dataset1_path = dataset1.location
dataset2_path = dataset2.location
dataset3_path = dataset3.location
dataset4_path = dataset4.location
dataset5_path = dataset5.location
dataset6_path = '/kaggle/working/Welding Defect Dataset'

remap_labels(dataset1_path, label_mappings_1)
remap_labels(dataset2_path, label_mappings_2)
remap_labels(dataset3_path, label_mappings_3)
remap_labels(dataset4_path, label_mappings_4)
remap_labels(dataset5_path, label_mappings_5)
remap_labels(dataset6_path, label_mappings_6)

# merge the datasets

In [26]:
output_path = '/kaggle/working/final_dataset'
image_output = os.path.join(output_path, 'images')
label_output = os.path.join(output_path, 'labels')
os.makedirs(image_output, exist_ok=True)
os.makedirs(label_output, exist_ok=True)

In [27]:
source_folders = [
    
    dataset1.location + '/train/images',
    dataset1.location + '/valid/images',
    dataset1.location + '/test/images',
    
    dataset2.location + '/train/images',
    dataset2.location + '/valid/images',
    dataset2.location + '/test/images',
    
    dataset3.location + '/train/images',
    dataset3.location + '/valid/images',
    dataset3.location + '/test/images',
    
    dataset4.location + '/train/images',
    dataset4.location + '/valid/images',
    dataset4.location + '/test/images',
    
    dataset5.location + '/train/images',
    dataset5.location + '/valid/images',
    dataset5.location + '/test/images',
    
    '/kaggle/working/Welding Defect Dataset/train/images',
    '/kaggle/working/Welding Defect Dataset/valid/images',
    '/kaggle/working/Welding Defect Dataset/test/images'
    
]

destination_folder = '/kaggle/working/final_dataset/images'

copy_images(source_folders, destination_folder)

Total number of images copied: 25950


In [28]:
# Define the source folders and the destination folder
source_folders = [
    
    dataset1.location + '/train/labels',
    dataset1.location + '/valid/labels',
    dataset1.location + '/test/labels',
    
    dataset2.location + '/train/labels',
    dataset2.location + '/valid/labels',
    dataset2.location + '/test/labels',
    
    dataset3.location + '/train/labels',
    dataset3.location + '/valid/labels',
    dataset3.location + '/test/labels',
    
    dataset4.location + '/train/labels',
    dataset4.location + '/valid/labels',
    dataset4.location + '/test/labels',
    
    dataset5.location + '/train/labels',
    dataset5.location + '/valid/labels',
    dataset5.location + '/test/labels',
    
    '/kaggle/working/Welding Defect Dataset/train/labels',
    '/kaggle/working/Welding Defect Dataset/valid/labels',
    '/kaggle/working/Welding Defect Dataset/test/labels'
    
]

destination_folder = '/kaggle/working/final_dataset/labels'

copy_text_files(source_folders, destination_folder)

Total number of text files copied: 25950


# Remove the old datasets

In [29]:
remove_folders = [dataset1.location, dataset2.location, dataset3.location, dataset4.location, dataset5.location, '/kaggle/working/Welding Defect Dataset'] # 

for folder in remove_folders:
    if os.path.exists(folder):
        shutil.rmtree(folder)
        print(f"Folder '{folder}' has been removed.")
    else:
        print(f"Folder '{folder}' does not exist.")

Folder '/kaggle/working/Final-Dataset-1' has been removed.
Folder '/kaggle/working/Defect-Detection-(renamed-classes)-4' has been removed.
Folder '/kaggle/working/Weld-quality-inspection-1' has been removed.
Folder '/kaggle/working/Welding-1' has been removed.
Folder '/kaggle/working/welding-1' has been removed.
Folder '/kaggle/working/Welding Defect Dataset' has been removed.


# check the number of image and labels

In [30]:
numbers_of_image('/kaggle/working/final_dataset/images')

print('\n')

numbers_of_image('/kaggle/working/final_dataset/labels')

for that path of data, we found 25950 image and 0 text


for that path of data, we found 0 image and 25950 text


# Remove empty labels

In [31]:
image_foler_path = '/kaggle/working/final_dataset/images'
label_folder_path = '/kaggle/working/final_dataset/labels'

remove_empty_labels(label_folder_path, image_foler_path)

In [32]:
numbers_of_image('/kaggle/working/final_dataset/images')

print('\n')

numbers_of_image('/kaggle/working/final_dataset/labels')

for that path of data, we found 19727 image and 0 text


for that path of data, we found 0 image and 19727 text


# Remove duplicat images

In [33]:
image_foler_path = '/kaggle/working/final_dataset/images'
label_folder_path = '/kaggle/working/final_dataset/labels'

delete_duplicate_images_and_labels(image_foler_path, label_folder_path)

Total duplicates found and deleted: 2147


In [34]:
numbers_of_image('/kaggle/working/final_dataset/images')

print('\n')

numbers_of_image('/kaggle/working/final_dataset/labels')

for that path of data, we found 17580 image and 0 text


for that path of data, we found 0 image and 17580 text


# Remove duplicat labels

In [35]:
image_foler_path = '/kaggle/working/final_dataset/images'
label_folder_path = '/kaggle/working/final_dataset/labels'

remove_duplicate_labels(label_folder_path, image_foler_path)

In [36]:
numbers_of_image('/kaggle/working/final_dataset/images')

print('\n')

numbers_of_image('/kaggle/working/final_dataset/labels')

for that path of data, we found 15835 image and 0 text


for that path of data, we found 0 image and 15835 text


# rename the images

In [37]:
image_folder = '/kaggle/working/final_dataset/images'
label_folder = '/kaggle/working/final_dataset/labels'
output_image_folder = '/kaggle/working/final_dataset_2/images'
output_label_folder = '/kaggle/working/final_dataset_2/labels'

rename_images_and_labels(image_folder, label_folder, output_image_folder, output_label_folder)

Renamed 15835 images and labels.


In [38]:
numbers_of_image('/kaggle/working/final_dataset_2/images')

print('\n')

numbers_of_image('/kaggle/working/final_dataset_2/labels')

for that path of data, we found 15835 image and 0 text


for that path of data, we found 0 image and 15835 text


# Remove the old datasets

In [39]:
remove_folders = ['/kaggle/working/final_dataset']

for folder in remove_folders:
    if os.path.exists(folder):
        shutil.rmtree(folder)
        print(f"Folder '{folder}' has been removed.")
    else:
        print(f"Folder '{folder}' does not exist.")

Folder '/kaggle/working/final_dataset' has been removed.


# split_dataset

In [40]:
image_folder = '/kaggle/working/final_dataset_2/images'  # Folder where renamed images are stored
label_folder = '/kaggle/working/final_dataset_2/labels'  # Folder where renamed labels are stored
output_folder = '/kaggle/working/final-Weld-Dataset'

# Define percentages for train, test, and validation splits
train_pct = 90
test_pct = 2
val_pct = 8

split_dataset(image_folder, label_folder, output_folder, train_pct, test_pct, val_pct)


Training set: 14251 images and labels
Test set: 316 images and labels
Validation set: 1268 images and labels


In [41]:
numbers_of_image('/kaggle/working/final-Weld-Dataset/train')

print('\n')

numbers_of_image('/kaggle/working/final-Weld-Dataset/val')

print('\n')

numbers_of_image('/kaggle/working/final-Weld-Dataset/test')

for that path of data, we found 0 in folder labels and and 14251 text in folder labels

for that path of data, we found 14251 in folder images and and 0 text in folder images

The classification classes is {'labels': 0, 'images': 1}


for that path of data, we found 0 in folder labels and and 1268 text in folder labels

for that path of data, we found 1268 in folder images and and 0 text in folder images

The classification classes is {'labels': 0, 'images': 1}


for that path of data, we found 0 in folder labels and and 316 text in folder labels

for that path of data, we found 316 in folder images and and 0 text in folder images

The classification classes is {'labels': 0, 'images': 1}


{'labels': 0, 'images': 1}

# Remove the old datasets

In [42]:
remove_folders = ['/kaggle/working/final_dataset_2']

for folder in remove_folders:
    if os.path.exists(folder):
        shutil.rmtree(folder)
        print(f"Folder '{folder}' has been removed.")
    else:
        print(f"Folder '{folder}' does not exist.")

Folder '/kaggle/working/final_dataset_2' has been removed.


# creat yaml

In [43]:
output_yaml_path = '/kaggle/working/final-Weld-Dataset/dataset_config.yaml'
train_dataset_path = '/kaggle/working/final-Weld-Dataset/train'  # Path to the training dataset images
val_dataset_path = '/kaggle/working/final-Weld-Dataset/val'      # Path to the validation dataset images
test_dataset_path = '/kaggle/working/final-Weld-Dataset/test'    # Path to the test dataset images

labels = ['Defect', 'Good Welding', 'Porosity', 'Bad Welding']

create_dataset_yaml(output_yaml_path, train_dataset_path, val_dataset_path, test_dataset_path, labels)


YAML file created at /kaggle/working/final-Weld-Dataset/dataset_config.yaml


# check the text lines

In [44]:
# folder_path = "/kaggle/working/final-Weld-Dataset/train/labels"  # Replace with your folder path

# random_txt_file = get_random_txt_file(folder_path)
# print(f"Reading from random file: {random_txt_file}")

# read_file_with_dashes(random_txt_file)

# Show some images

In [45]:
# # Path to the merged dataset (replace with your actual path)
# image_dir = "/kaggle/working/final-Weld-Dataset/train/images"
# label_dir = "/kaggle/working/final-Weld-Dataset/train/labels"

# # Label names (after merging)
# label_names = ['Defect', 'Good Welding', 'Workpiece', 'Porosity', 'Bad Welding', 'Spatter']

# # Show random image with bounding boxes
# show_random_image_with_bbox(image_dir, label_dir)

# Rename directory

In [46]:
# current_directory = '/kaggle/working/final-Weld-Dataset'
# dataset_location = '/kaggle/working/final-Weld-Dataset'

# rename_directory(current_directory, dataset_location)

# upload on kaggle

In [47]:
# # Save the dataset output to the /kaggle/working directory
# dataset_path = '/kaggle/working/final-Weld-Dataset'

# # Create dataset-metadata.json file with a valid id (alphanumeric + hyphen)
# metadata = {
#     "title": "Weld Dataset",  # Ensure the title is simple and clean
#     "id": "zyadashrafamar/weld-dataset",  # Use only alphanumeric characters and hyphens
#     "licenses": [
#         {
#             "name": "CC0-1.0"
#         }
#     ]
# }

# metadata_path = '/kaggle/working/dataset-metadata.json'
# with open(metadata_path, 'w') as f:
#     json.dump(metadata, f)

# # Ensure the kaggle.json API token is in the correct location
# !mkdir -p ~/.kaggle
# !cp /kaggle/input/kaggle-json/kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

# # Upload the dataset to Kaggle using --dir-mode to upload directories
# !kaggle datasets create -p {dataset_path} --dir-mode zip

# Train the model

In [48]:
#clone YOLOv5 and
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Cloning into 'yolov5'...
remote: Enumerating objects: 16965, done.[K
remote: Counting objects: 100% (160/160), done.[K
remote: Compressing objects: 100% (111/111), done.[K
remote: Total 16965 (delta 81), reused 99 (delta 49), pack-reused 16805 (from 1)[K
Receiving objects: 100% (16965/16965), 15.70 MiB | 31.10 MiB/s, done.
Resolving deltas: 100% (11610/11610), done.
/kaggle/working/yolov5
Note: you may need to restart the kernel to use updated packages.
Setup complete. Using torch 2.4.0 (Tesla T4)


In [49]:
!WANDB_MODE="disabled" python train.py --img 640 --batch 16 --epochs 101 --data /kaggle/working/final-Weld-Dataset/dataset_config.yaml --weights yolov5s.pt --cache

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=/kaggle/working/final-Weld-Dataset/dataset_config.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=101, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=late

# Evaluate Custom YOLOv5 Detector Performance

In [50]:
# # Start tensorboard
# # Launch after you have started training
# # logs save in the folder "runs"
# %load_ext tensorboard
# %tensorboard --logdir runs

In [51]:
!python detect.py --weights runs/train/exp/weights/best.pt --img 640 --conf 0.1 --source /kaggle/working/final-Weld-Dataset/test/images

[34m[1mdetect: [0mweights=['runs/train/exp/weights/best.pt'], source=/kaggle/working/final-Weld-Dataset/test/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.1, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_format=0, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-369-g907bef2f Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)

Fusing layers... 
Model summary: 157 layers, 7020913 parameters, 0 gradients, 15.8 GFLOPs
image 1/316 /kaggle/working/final-Weld-Dataset/test/images/image10021.jpg: 640x640 1 Bad Welding, 11.4ms
image 2/316 /kaggle/working/final-Weld-Dataset/test/images/image10048.jpg: 640x640 1 Bad Welding, 11.5ms
image 3/316 /kaggle/working/final-Weld-Dataset/test/images/image10060.j

In [52]:
!python detect.py --weights runs/train/exp/weights/best.pt --img 640 --conf 0.1 --source /kaggle/input/testimg

[34m[1mdetect: [0mweights=['runs/train/exp/weights/best.pt'], source=/kaggle/input/testimg, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.1, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_format=0, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-369-g907bef2f Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)

Fusing layers... 
Model summary: 157 layers, 7020913 parameters, 0 gradients, 15.8 GFLOPs
image 1/2 /kaggle/input/testimg/6581752_orig.png: 352x640 2 Defects, 2 Good Weldings, 6 Bad Weldings, 30.8ms
image 2/2 /kaggle/input/testimg/Screenshot 2024-10-05 004049.png: 416x640 1 Good Welding, 1 Bad Welding, 31.7ms
Speed: 0.4ms pre-process, 31.2ms inference, 74.5ms NMS per image at shape (1, 3, 640,

# Download the new weights with YOLO

In [53]:
folder_path = '/kaggle/working/yolov5'
size_in_bytes = get_folder_size(folder_path)
print(f"Folder size: {size_in_bytes / (1024*1024):.2f} MB")

Folder size: 111.44 MB


In [54]:
folder_path = '/kaggle/working/yolov5'
output_zip = '/kaggle/working/yolov5s_trained_321312'  # This will create 'folder_zip.zip'
zip_file_path = zip_folder(folder_path, output_zip)

/kaggle/working/yolov5s_trained_321312.zip has been created successfully.
