In [1]:
# Remove packages that conflict
!pip uninstall -y tsfresh umap-learn bigframes dopamine-rl cesium preprocessing || true

# Reinstall a safe scikit-learn + numpy + scipy compatible with ultralytics
!pip install --force-reinstall --no-cache-dir \
    numpy==1.26.4 \
    scipy==1.11.4 \
    scikit-learn==1.3.2

!pip install ultralytics==8.3.226 --no-deps

Found existing installation: tsfresh 0.21.0
Uninstalling tsfresh-0.21.0:
  Successfully uninstalled tsfresh-0.21.0
Found existing installation: umap-learn 0.5.9.post2
Uninstalling umap-learn-0.5.9.post2:
  Successfully uninstalled umap-learn-0.5.9.post2
Found existing installation: bigframes 2.12.0
Uninstalling bigframes-2.12.0:
  Successfully uninstalled bigframes-2.12.0
Found existing installation: dopamine_rl 4.1.2
Uninstalling dopamine_rl-4.1.2:
  Successfully uninstalled dopamine_rl-4.1.2
Found existing installation: cesium 0.12.4
Uninstalling cesium-0.12.4:
  Successfully uninstalled cesium-0.12.4
Found existing installation: preprocessing 0.1.13
Uninstalling preprocessing-0.1.13:
  Successfully uninstalled preprocessing-0.1.13
Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ

In [2]:
import os

PROJECT_DIR = "/kaggle/working/Pothole_detection_project"

os.makedirs(PROJECT_DIR, exist_ok=True)
os.chdir(PROJECT_DIR)
print(f"Current working directory : {os.getcwd()}")

Current working directory : /kaggle/working/Pothole_detection_project


### Create k folds

In [3]:
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import os
import pandas as pd
import numpy as np

IMGS_DIR = "/kaggle/input/pothole-dataset/pothole_dataset/images/train"
LABELS_DIR = "/kaggle/input/pothole-dataset/pothole_dataset/images/train"

K = 5  # folds

# Count potholes per image
def count_boxes(ann_path):
    if not os.path.exists(ann_path):
        return 0
    with open(ann_path) as f:
        return len([line for line in f.read().strip().split('\n') if line.strip()])

# Build DataFrame
imgs = [f for f in os.listdir(IMGS_DIR) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]
data = []
for img in tqdm(imgs, desc="Counting boxes"):
    ann = os.path.join(LABELS_DIR, os.path.splitext(img)[0] + '.txt')
    num_boxes = count_boxes(ann)
    data.append({'filename': img, 'num_boxes': num_boxes})

df = pd.DataFrame(data)

# Stratify: group negatives + bins for positives
bins = [1, 3, 6, 999]
df['stratify'] = np.where(df['num_boxes'] == 0, 0, np.digitize(df['num_boxes'], bins))

# 5-Fold CV
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

OUT_ROOT = f"{PROJECT_DIR}/pothole_dataset/folds"
os.makedirs(OUT_ROOT, exist_ok=True)

for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['stratify'])):
    
    fold_num = fold + 1
    
    train_files = df.iloc[train_idx]['filename'].tolist()
    val_files   = df.iloc[val_idx]['filename'].tolist()

    # --- Write .txt path lists ---
    train_txt = os.path.join(OUT_ROOT, f"fold_{fold_num}_train.txt")
    val_txt   = os.path.join(OUT_ROOT, f"fold_{fold_num}_val.txt")

    with open(train_txt, 'w') as f:
        for file in train_files:
            f.write(os.path.join(IMGS_DIR, file) + '\n')
    with open(val_txt, 'w') as f:
        for file in val_files:
            f.write(os.path.join(IMGS_DIR, file) + '\n')
            
    # --- Write data.yaml ---
    yaml_content = f"""
path: {PROJECT_DIR}/pothole_dataset
train: folds/fold_{fold_num}_train.txt
val:   folds/fold_{fold_num}_val.txt
nc: 1
names: ['pothole']
    """
    
    yaml_path = os.path.join(OUT_ROOT, f"fold_{fold_num}_data.yaml")
    with open(yaml_path, 'w') as f:
        f.write(yaml_content.strip())

print(f"Created {K}-fold splits. Each fold: ~{len(df)//K} val images.")

Counting boxes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3765/3765 [00:03<00:00, 973.84it/s] 

Created 5-fold splits. Each fold: ~753 val images.





# Train all folds

In [8]:
import os
import torch
from ultralytics import YOLO
from contextlib import redirect_stdout, redirect_stderr
import logging

def train_fold(fold_num, PROJECT_DIR="."):
    DATASET_DIR = f"{PROJECT_DIR}/pothole_dataset"
    yaml_path = f"{DATASET_DIR}/folds/fold_{fold_num}_data.yaml"
    
    # --- Log & map paths ---
    log_dir = os.path.join(PROJECT_DIR, "runs")
    os.makedirs(log_dir, exist_ok=True)
    log_file_path = os.path.join(log_dir, f"pothole_fold_{fold_num}_log.txt")
    map_file_path = os.path.join(log_dir, f"pothole_fold_{fold_num}_map.txt")

    # --- scale LR ---
    per_gpu_batch = 16
    effective_batch = per_gpu_batch * 2
    lr0 = 0.01 * (effective_batch / 16)  # Linear scaling

    # --- Suppress Ultralytics console spam ---
    logging.getLogger("ultralytics").handlers = []
    logging.getLogger("ultralytics").propagate = False

    # --- Capture all output ---
    results = None
    error_msg = None

    try:
        with open(log_file_path, "w") as log_file,redirect_stdout(log_file), redirect_stderr(log_file):

            model = YOLO("yolov8n.pt")

            results = model.train(
                data=yaml_path,
                epochs=100,
                imgsz=640,
                batch=per_gpu_batch,
                device="0,1",
                name=f"pothole_fold_{fold_num}",
                project="runs",
                patience=20,
                augment=True,
                exist_ok=True,
                verbose=False,
                plots=False,
                lr0=lr0,
                lrf=0.1,
                optimizer="AdamW"
            )

            # Save mAP
             map_score = results.results_dict['metrics/mAP50-95(B)']
             map_file = os.path.join(log_dir, f"pothole_fold_{fold_num}_map.txt")
             with open(map_file, "w") as f:
                 f.write(str(map_score))

    except Exception as e:
        error_msg = str(e)
        
        # Write error to log
        with open(log_file_path, "a") as f:
            f.write(f"\n\n TRAINING FAILED: {error_msg}\n")
            
        print(f"ERROR in fold {fold_num}: {error_msg}")

In [9]:
import subprocess

for i in range(1, 6):
    print(f"====== Training Fold {i} ======")
    try:
        train_fold(i)
    except Exception as e:
        print(f"ERROR in fold {i}: {e}")
        raise

print("\nFolds training completed")

Ultralytics 8.3.226 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
                                                       CUDA:1 (Tesla T4, 15095MiB)
Overriding model.yaml nc=80 with nc=1
Transferred 319/355 items from pretrained weights
Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 5.4MB 69.1MB/s 0.1s
[34m[1mAMP: [0mchecks passed ‚úÖ
[34m[1mtrain: [0mFast image access ‚úÖ (ping: 3.7¬±1.8 ms, read: 38.8¬±26.0 MB/s, size: 425.9 KB)
[K[34m[1mtrain: [0mScanning /kaggle/input/pothole-dataset/pothole_dataset/labels/train... 3010 images, 279 backgrounds, 2 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 3012/3012 160.8it/s 18.7s<0.1s
[34m[1mtrain: [0m/kaggle/input/pothole-dataset/pothole_dataset/images/train/andrewmvd_3.png: ignoring corrupt image/l

In [None]:
import glob
import numpy as np

map_files = glob.glob("runs/pothole_fold_*_map.txt")
maps = [float(open(f).read().strip()) for f in map_files]

print("Cross-Validation Results (mAP@0.5:0.95):")
print(f"  Mean: {np.mean(maps):.4f}")
print(f"  Std:  {np.std(maps):.4f}")
print(f"  Best Fold: {np.argmax(maps) + 1} (mAP: {max(maps):.4f})")

In [17]:
import os
import torch
from ultralytics import YOLO
from contextlib import redirect_stdout, redirect_stderr
import logging

DATASET_DIR = f"{PROJECT_DIR}/pothole_dataset"

best_fold = 1
best_weights = f"runs/pothole_fold_{best_fold}/weights/best.pt"

# --- Log & map paths ---
log_dir = os.path.join(PROJECT_DIR, "runs")
os.makedirs(log_dir, exist_ok=True)
log_file_path = os.path.join(log_dir, f"pothole_full_log.txt")
map_file_path = os.path.join(log_dir, f"pothole_full_map.txt")

# --- scale LR ---
per_gpu_batch = 16
effective_batch = per_gpu_batch * 2
lr0 = 0.01 * (effective_batch / 16)  # Linear scaling

# --- data.yaml ---
yaml_content=f"""
path: /kaggle/input/pothole-dataset/pothole_dataset
train: images/train
val: images/train
nc: 1
names: ['pothole']
"""

yaml_path = "data.yaml"
with open(yaml_path, 'w') as f:
   f.write(yaml_content.strip())

# --- Capture all output ---
results = None
error_msg = None

try:
    with open(log_file_path, "w") as log_file,redirect_stdout(log_file), redirect_stderr(log_file):

        model = YOLO(best_weights)

        results = model.train(
            data=yaml_path,
            epochs=100,
            imgsz=640,
            batch=per_gpu_batch,
            device="0,1",
            name=f"pothole_full",
            project="runs",
            patience=20,
            augment=True,
            exist_ok=True,
            verbose=False,
            plots=False,
            lr0=lr0,
            lrf=0.1,
            optimizer="AdamW"
        )

        # Save mAP
        map_score = results.results_dict['metrics/mAP50-95(B)']
        map_file = os.path.join(log_dir, f"pothole_full_map.txt")
        with open(map_file, "w") as f:
         f.write(str(map_score))

except Exception as e:
    error_msg = str(e)
    
    # Write error to log
    with open(log_file_path, "a") as f:
        f.write(f"\n\n TRAINING FAILED: {error_msg}\n")
        
    print(f"ERROR in full training : {error_msg}")

Ultralytics 8.3.226 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
                                                       CUDA:1 (Tesla T4, 15095MiB)
Transferred 355/355 items from pretrained weights
Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
[34m[1mAMP: [0mchecks passed ‚úÖ
[34m[1mtrain: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 238.1¬±123.8 MB/s, size: 123.4 KB)
[K[34m[1mtrain: [0mScanning /kaggle/input/pothole-dataset/pothole_dataset/labels/train... 3763 images, 351 backgrounds, 2 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 3765/3765 704.7it/s 5.3s0.1s
[34m[1mtrain: [0m/kaggle/input/pothole-dataset/pothole_dataset/images/train/andrewmvd_3.png: ignoring corrupt image/label: cannot identify image file '/kaggle/input/pothole-dataset/pothole_dataset/images/train/andrewmvd_3.png'
[34m[1mtrain: [0m/kaggle/input/pothole-dataset/pothole_dataset/images/train/atulyakumar98_355

In [18]:
!zip -r working.zip /kaggle/working

  adding: kaggle/working/ (stored 0%)
  adding: kaggle/working/Pothole_detection_project/ (stored 0%)
  adding: kaggle/working/Pothole_detection_project/runs/ (stored 0%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_3/ (stored 0%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_3/results.csv (deflated 59%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_3/args.yaml (deflated 51%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_3/weights/ (stored 0%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_3/weights/last.pt (deflated 9%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_3/weights/best.pt (deflated 9%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_1/ (stored 0%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_1/results.csv (deflated 60%)
  adding: kaggle/working/Pothole_detection_project/runs/pothole_fold_1/args