In [4]:
import sys
import platform

print("Executable:", sys.executable)
print("Version:", platform.python_version())


Executable: /opt/conda/envs/myenv/bin/python
Version: 3.10.12


In [12]:
import os
import json
import glob
import torch
import logging

logging.basicConfig(filename="conversion_errors.log",level=logging.INFO)


In [13]:
# Use of computer configured gpu
if torch.backends.mps.is_available():
    device = "mps"
elif torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device: {device}")

Using device: cuda


In [14]:
META_JSON_FILE = 'meta.json'
with open(META_JSON_FILE, "r") as meta_file:
    meta_data = json.load(meta_file)

class_names = [
    "longitudinal crack",
    "transverse crack",
    "alligator crack",
    "block crack",
    "pothole",
    "manhole cover",
    "other corruption"
]
class_map = {name: idx for idx, name in enumerate(class_names)}
print("Loaded class mapping:", class_map)

Loaded class mapping: {'longitudinal crack': 0, 'transverse crack': 1, 'alligator crack': 2, 'block crack': 3, 'pothole': 4, 'manhole cover': 5, 'other corruption': 6}


In [15]:
# Step 3: Process All JSON Files
INPUT_JSON_DIR_TRAIN = "datasets/yolo_dataset/train/annotations/"
INPUT_JSON_DIR_VAL = "datasets/yolo_dataset/val/annotations/"

OUTPUT_DIR_TRAIN = "datasets/yolo_dataset/train/labels/"
OUTPUT_DIR_VAL = "datasets/yolo_dataset/val/labels/"

json_files_train = glob.glob(os.path.join(INPUT_JSON_DIR_TRAIN, "*.json"))
json_files_test = glob.glob(os.path.join(INPUT_JSON_DIR_VAL, "*.json"))


In [16]:
def convert_json_to_yolo(json_file, output_dir):
    with open(json_file, 'r') as json_f:
        data = json.load(json_f)
    
    image_width, image_height = data["size"]["width"], data["size"]["height"]
    yolo_annotations = []
   
 
    for obj in data["objects"]:
        class_name = obj["classTitle"]
        if class_name not in class_map:
            logging.error(f"Class '{class_name}' not found in class mapping. Skipping object.")
            continue

        class_id = class_map[class_name]
       
        points = obj["points"]["exterior"]
        x_min, y_min = points[0]
        x_max, y_max = points[1]
        
        x_center = (x_min + x_max) / 2.0 / image_width
        y_center = (y_min + y_max) / 2.0 / image_height
        width = (x_max - x_min) / image_width
        height = (y_max - y_min) / image_height

       
        yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    output_txt = os.path.join(output_dir, os.path.basename(json_file).replace(".jpg.json", ".txt"))
    os.makedirs(output_dir, exist_ok=True)
    with open(output_txt, "w") as f:
        f.write("\n".join(yolo_annotations))

    return f"Converted: {json_file} → {output_txt}"

In [17]:
# Converting train folder annotations
print("--------------------------------")
print("Processing Train Annotations...")
for json_file in json_files_train:
    convert_json_to_yolo(json_file, OUTPUT_DIR_TRAIN)
print("--------------------------------")
print("Processed Train Annotations...")

--------------------------------
Processing Train Annotations...
--------------------------------
Processed Train Annotations...


In [18]:
# Converting Val folder annotations
print("--------------------------------")
print("Processing Validation Annotations...")
for json_file in json_files_test:
    convert_json_to_yolo(json_file, OUTPUT_DIR_VAL)
print("--------------------------------")
print("Processed Validation Annotations...")

--------------------------------
Processing Validation Annotations...
--------------------------------
Processed Validation Annotations...


In [19]:
import os

folder_path = "datasets/yolo_dataset/train/labels"

# List all entries and filter only files
file_count = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])

print("Total number of files:", file_count)


Total number of files: 45708


In [20]:
gitignore_content = """
# Python cache and environments
__pycache__/
*.pyc
*.pyo
*.pyd
*.pdb
venv/
env/
*.env
*.venv

# Jupyter Notebook checkpoints
.ipynb_checkpoints

# Dataset folders (images + annotations)
datasets/
__MACOSX/
*.zip
*.tar.gz

# YOLO / model outputs
runs/
weights/
*.pt
*.pth
*.ckpt

# Logs and temporary files
*.log
*.tmp
*.bak

# OS specific files
.DS_Store
Thumbs.db

# IDE files
.vscode/
.idea/
*.sublime-project
*.sublime-workspace

# Checkpoints / progress files
processed_images.txt
"""

with open(".gitignore", "w") as f:
    f.write(gitignore_content)

print(".gitignore file created successfully!")


.gitignore file created successfully!
