In [1]:
import sys; 
!{sys.executable} -m pip install ultralytics torch torchvision torchaudio opencv-contrib-python-headless numpy matplotlib scikit-learn tensorflow pillow

Collecting ultralytics
  Downloading ultralytics-8.3.82-py3-none-any.whl.metadata (35 kB)
Collecting torch
  Downloading torch-2.6.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting torchaudio
  Downloading torchaudio-2.6.0-cp312-cp312-win_amd64.whl.metadata (6.7 kB)
Collecting opencv-contrib-python-headless
  Downloading opencv_contrib_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting numpy
  Downloading numpy-2.2.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.1-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting pillow
  Downloading pillow-11.1.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting numpy
 

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO
from sklearn.model_selection import train_test_split
import shutil
import torch

In [None]:
# Define paths for your dataset
base_path = 'C:\\Users\\Srikrishna\\Documents\\GitHub\\Sem_4\\Robotics-MFC-S4-D12\\pothole_image_data'
normal_path = os.path.join(base_path, 'normal')
potholes_path = os.path.join(base_path, 'potholes')

# Create train and val directories under pothole_image_data
train_images_path = os.path.join(base_path, 'train', 'images')
train_labels_path = os.path.join(base_path, 'train', 'labels')
val_images_path = os.path.join(base_path, 'val', 'images')
val_labels_path = os.path.join(base_path, 'val', 'labels')

# Create directories if they don't exist
for path in [train_images_path, train_labels_path, val_images_path, val_labels_path]:
    os.makedirs(path, exist_ok=True)

# Function to split dataset into train and val
def split_dataset(normal_path, potholes_path, train_ratio=0.8):
    # Get all normal and pothole image paths
    normal_images = [os.path.join(normal_path, f) for f in os.listdir(normal_path) if f.endswith('.jpg')]
    pothole_images = [os.path.join(potholes_path, f) for f in os.listdir(potholes_path) if f.endswith('.jpg')]
    
    # Split normal images
    normal_train, normal_val = train_test_split(normal_images, train_size=train_ratio, random_state=42)
    # Split pothole images
    pothole_train, pothole_val = train_test_split(pothole_images, train_size=train_ratio, random_state=42)
    
    # Copy images to train and val directories
    for img_path in normal_train:
        shutil.copy(img_path, os.path.join(train_images_path, os.path.basename(img_path)))
        # Create empty label file for normal (no potholes)
        label_path = os.path.join(train_labels_path, os.path.splitext(os.path.basename(img_path))[0] + '.txt')
        with open(label_path, 'w') as f:
            pass  # Empty file indicates no potholes
    
    for img_path in normal_val:
        shutil.copy(img_path, os.path.join(val_images_path, os.path.basename(img_path)))
        label_path = os.path.join(val_labels_path, os.path.splitext(os.path.basename(img_path))[0] + '.txt')
        with open(label_path, 'w') as f:
            pass  # Empty file indicates no potholes
    
    for img_path in pothole_train:
        shutil.copy(img_path, os.path.join(train_images_path, os.path.basename(img_path)))
        # You need to provide or generate YOLO annotations for potholes
        label_path = os.path.join(train_labels_path, os.path.splitext(os.path.basename(img_path))[0] + '.txt')
        # Placeholder for pothole annotation (replace with actual annotations)
        # Example: '0 0.5 0.5 0.2 0.2' for a pothole centered with width/height 20% of image
        # You must create these manually or using a tool like LabelImg/Roboflow
        with open(label_path, 'w') as f:
            f.write('0 0.5 0.5 0.2 0.2')  # Example, replace with real annotations
    
    for img_path in pothole_val:
        shutil.copy(img_path, os.path.join(val_images_path, os.path.basename(img_path)))
        label_path = os.path.join(val_labels_path, os.path.splitext(os.path.basename(img_path))[0] + '.txt')
        with open(label_path, 'w') as f:
            f.write('0 0.5 0.5 0.2 0.2')  # Example, replace with real annotations

# Split the dataset
split_dataset(normal_path, potholes_path)

# Create dataset.yaml with absolute paths
dataset_yaml = '''
train: {train_images_path}
val: {val_images_path}
nc: 1  # Number of classes (1 for potholes)
names: ['pothole']  # Class names
'''

# Write dataset.yaml to file with absolute paths
with open('dataset.yaml', 'w') as f:
    f.write(dataset_yaml.format(train_images_path=train_images_path.replace('\\', '/'), 
                               val_images_path=val_images_path.replace('\\', '/')))

# Update Ultralytics settings to use the correct dataset directory (optional)
settings_path = os.path.join(os.path.expanduser('~'), 'AppData', 'Roaming', 'Ultralytics', 'settings.json')
if os.path.exists(settings_path):
    import json
    with open(settings_path, 'r') as f:
        settings = json.load(f)
    settings['datasets_dir'] = base_path  # Update to your pothole_image_data directory
    with open(settings_path, 'w') as f:
        json.dump(settings, f, indent=4)
else:
    print(f"Settings file not found at {settings_path}. Using default dataset directory.")

# Load or train a custom YOLOv8 model
model = YOLO('yolov8n.pt')  # Start with pre-trained YOLOv8 nano model

# Train the model on your pothole dataset
model.train(
    data='dataset.yaml',
    epochs=100,  # Increase for better accuracy, adjust as needed
    imgsz=640,   # Image size, adjust based on your images (YOLOv8 default is 640)
    batch=16,    # Batch size, adjust based on your GPU/CPU memory
    project='runs/train',  # Output directory for training results
    name='pothole_detection',  # Experiment name
    exist_ok=True  # Allow overwriting existing runs
)

# Load the best trained model
best_model = YOLO('runs/train/pothole_detection/weights/best.pt')  # Path to best model weights

In [None]:
def preprocess_image(image_path):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Error: Could not load image at {image_path}")
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply bilateral filtering for noise reduction and edge preservation
    smoothed = cv2.bilateralFilter(gray, 9, 75, 75)  # d=9, sigmaColor=75, sigmaSpace=75
    
    # Enhance contrast with CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(smoothed)
    
    # Use Canny edge detection with adaptive thresholding
    edges = cv2.Canny(enhanced, 50, 150)  # Adjust thresholds as needed
    
    # Morphological operations to clean edges
    kernel = np.ones((3, 3), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)
    edges = cv2.erode(edges, kernel, iterations=1)
    
    return image, edges

def get_grid_position(width, height, center_x, center_y, grid_size=10):
    # Divide the image into a grid_size x grid_size grid
    grid_width = width // grid_size
    grid_height = height // grid_size
    
    # Calculate grid cell (row, column) based on center coordinates
    row = center_y // grid_height
    col = center_x // grid_width
    
    # Ensure row and col are within bounds
    row = min(max(0, row), grid_size - 1)
    col = min(max(0, col), grid_size - 1)
    
    return row, col

def detect_pothole_in_image(model, image_path, scale_factor=0.1, grid_size=10):
    # Preprocess the image
    original_image, edges = preprocess_image(image_path)
    
    # Load and predict with YOLOv8
    results = model(image_path, conf=0.5)  # Confidence threshold of 0.5
    
    # Convert BGR to RGB for display
    rgb_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    
    pothole_detected = False
    image_width, image_height = rgb_image.shape[1], rgb_image.shape[0]
    
    for result in results:
        boxes = result.boxes  # Get bounding boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)  # Convert to CPU and int
            conf = box.conf.cpu().numpy()[0]  # Confidence score
            
            if conf >= 0.5:  # Ensure confidence threshold
                pothole_detected = True
                print(f"Pothole Detected with confidence {conf:.2f}!")
                
                # Calculate width and height in pixels
                w = x2 - x1
                h = y2 - y1
                
                # Calculate dimensions in cm (adjust scale_factor based on image resolution and real-world distance)
                width_cm = w * scale_factor
                height_cm = h * scale_factor
                
                # Calculate position (center coordinates in pixels)
                center_x = (x1 + x2) // 2
                center_y = (y1 + y2) // 2
                
                # Get grid position (row, column) in a 10x10 grid
                grid_row, grid_col = get_grid_position(image_width, image_height, center_x, center_y, grid_size)
                
                # Draw bounding box and text
                cv2.rectangle(rgb_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
                text = f'Pothole: {width_cm:.2f}cm x {height_cm:.2f}cm\nGrid Position: Row {grid_row}, Col {grid_col}'
                cv2.putText(rgb_image, text, (x1, y1-10), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    if not pothole_detected:
        print("No Pothole Detected.")
        cv2.putText(rgb_image, 'No Pothole', (50, 50),
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
    # Display the result
    plt.figure(figsize=(10, 8))
    plt.imshow(rgb_image)
    plt.title('Pothole Detection Result')
    plt.axis('off')
    plt.show()

    # Optionally save the annotated image
    output_path = 'pothole_detection_result.jpg'
    cv2.imwrite(output_path, cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR))
    print(f"Annotated image saved as {output_path}")

In [None]:
def detect_potholes_in_video(model, video_path, output_path='pothole_detection_output.avi', scale_factor=0.1, grid_size=10):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file at {video_path}")
        return
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    # Define the codec and create VideoWriter object to save the output video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')  # You can use 'MJPG' or other codecs
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
    
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break  # End of video
        
        frame_count += 1
        print(f"Processing frame {frame_count}")
        
        # Preprocess the frame
        processed_frame = preprocess_image_with_edges(frame)
        input_frame = processed_frame.reshape(1, 128, 128, 1) / 255.0
        
        # Predict if a pothole is present
        results = model(frame, conf=0.5)  # YOLOv8 prediction on the frame
        
        # Convert BGR frame to RGB for display and processing
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        pothole_detected = False
        
        for result in results:
            boxes = result.boxes  # Get bounding boxes
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
                conf = box.conf.cpu().numpy()[0]
                
                if conf >= 0.5:
                    pothole_detected = True
                    print(f"Frame {frame_count}: Pothole Detected with confidence {conf:.2f}!")
                    
                    # Calculate width and height in pixels
                    w = x2 - x1
                    h = y2 - y1
                    
                    # Calculate dimensions in cm
                    width_cm = w * scale_factor
                    height_cm = h * scale_factor
                    
                    # Calculate position (center coordinates in pixels)
                    center_x = (x1 + x2) // 2
                    center_y = (y1 + y2) // 2
                    
                    # Get grid position (row, column) in a 10x10 grid
                    grid_row, grid_col = get_grid_position(frame_width, frame_height, center_x, center_y, grid_size)
                    
                    # Draw bounding box and text
                    cv2.rectangle(rgb_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    text = f'Pothole: {width_cm:.2f}cm x {height_cm:.2f}cm\nGrid Position: Row {grid_row}, Col {grid_col}'
                    cv2.putText(rgb_frame, text, (x1, y1-10), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        
        if not pothole_detected:
            cv2.putText(rgb_frame, 'No Pothole', (50, 50),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        # Convert back to BGR for OpenCV output and writing to video
        annotated_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
        
        # Write the annotated frame to the output video
        out.write(annotated_frame)
    
    # Release resources
    cap.release()
    out.release()
    print(f"Annotated video saved as {output_path}")

In [None]:
# Process a single image
image_path = 'pothole.jpg'  
detect_pothole_in_image(best_model, image_path, scale_factor=0.1, grid_size=10)

In [None]:
# Process a video
video_path = 'demo.mp4'  
detect_potholes_in_video(best_model, video_path, scale_factor=0.1, grid_size=10)