In [None]:
import os
import json
import pandas as pd

# Path to folder containing JSON files
folder_path = '1_broken_tiles'

# List to store extracted rows
rows = []

# Loop through all JSON files in the folder
for file in os.listdir(folder_path):
    if file.endswith('.json'):
        json_path = os.path.join(folder_path, file)

        with open(json_path, 'r') as f:
            data = json.load(f)

            # Navigate to the 'responses' list
            responses = data.get("data", {}).get("responses", [])
            for response in responses:
                attachments = response.get("attachments", [])
                for att in attachments:
                    url = att.get("url")  # e.g. "images/filename.jpg"
                    broken = att.get("broken")  # e.g. "yes" or "no"

                    if url and broken:
                        rows.append({"url": url, "broken": broken})

# Convert to DataFrame
df = pd.DataFrame(rows)

# Show preview
display(df.head())

In [None]:
df.info()

In [None]:
df['broken'].unique()

In [None]:
df[df['broken'] == 'na']

In [None]:
df[df['broken'] == 'yes']

In [None]:
df['broken'].value_counts()

In [None]:
import os
import shutil
import pandas as pd

def copy_images_by_label(df, label_value, label_column, source_folder='images', target_folder='output'):
    """
    Copy images from a DataFrame where label_column == label_value into a target folder.

    Parameters:
    - df: pandas DataFrame with at least a 'url' and label column
    - label_value: e.g., 'yes' or 'no'
    - label_column: column to filter on (e.g., 'broken')
    - source_folder: folder where the images are located
    - target_folder: destination folder where images will be copied
    """
    filtered_df = df[df[label_column] == label_value]
    os.makedirs(target_folder, exist_ok=True)

    copied = 0
    for _, row in filtered_df.iterrows():
        filename = row['url']
        src_path = os.path.join(source_folder, os.path.basename(filename))
        dst_path = os.path.join(target_folder, os.path.basename(filename))

        if os.path.exists(src_path):
            shutil.copy(src_path, dst_path)
            copied += 1
        else:
            print(f"❌ File not found: {src_path}")
    
    print(f"✅ Copied {copied} files to '{target_folder}'")

In [None]:
copy_images_by_label(df, 'yes', 'broken', target_folder='broken')
copy_images_by_label(df, 'no', 'broken', target_folder='no_broken')

In [None]:
import os
import shutil
import random
from pathlib import Path

# 🗂️ Set your base dataset path
base_path = Path("tile_dataset")

# 🔍 Source folders (all images & labels currently in train)
img_dir = base_path / "images/train"
lbl_dir = base_path / "labels/train"

# 🎯 Target folders
val_img_dir = base_path / "images/val"
val_lbl_dir = base_path / "labels/val"

# ✅ Create folders
val_img_dir.mkdir(parents=True, exist_ok=True)
val_lbl_dir.mkdir(parents=True, exist_ok=True)

# 🎲 Split logic
image_paths = list(img_dir.glob("*.jpg")) + list(img_dir.glob("*.png"))
random.seed(42)
val_ratio = 0.2
val_images = random.sample(image_paths, int(len(image_paths) * val_ratio))

# 🚚 Move images + labels to val/
for img_path in val_images:
    label_path = lbl_dir / img_path.with_suffix(".txt").name

    shutil.move(img_path, val_img_dir / img_path.name)
    
    if label_path.exists():
        shutil.move(label_path, val_lbl_dir / label_path.name)

print(f"✅ Moved {len(val_images)} images to validation set.")

In [None]:
import yaml
from pathlib import Path

# Path to save the data.yaml file
data_yaml_path = Path("tile_dataset/data.yaml")

# Define the structure
data_yaml = {
    "path": "tile_dataset",
    "train": "images/train",
    "val": "images/val",
    "names": ["broken_tile"]
}

# Save it
with open(data_yaml_path, "w") as f:
    yaml.dump(data_yaml, f)

print(f"✅ data.yaml created at: {data_yaml_path.resolve()}")

In [None]:
pip install ultralytics

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # Load pre-trained base model
model.train(data="tile_dataset/data.yaml", epochs=50, imgsz=640)